We look for genes associated with fibroblast activation in several single-cell datasets.
suppressPackageStartupMessages({
# install.packages("BiocManager")
# install.packages("kableExtra")
requireNamespace("kableExtra")
# ?
# devtools::install_github("rstudio/crosstalk")
# library("crosstalk")
# library("htmlwidgets")
# install.packages("crunch")
# requireNamespace("crunch")
# https://rstudio.github.io/DT/
# install.packages("DT")
requireNamespace("DT")
# install.packages("RCurl")
requireNamespace("RCurl")
# install.packages("memoise")
requireNamespace("memoise")
# install.packages("dplyr")
library(dplyr)
# install.packages("ggplot2")
library(ggplot2)
# install.packages("ggsci")
requireNamespace("ggsci")
# install.packages("ggtext")
requireNamespace("ggtext")
# install.packages("ggrepel")
requireNamespace("ggrepel")
# install.packages("assertthat")
requireNamespace("assertthat")
#
requireNamespace("magrittr")
`%<>%` <- magrittr::`%<>%`
`%>%` <- magrittr::`%>%`
# install.packages("glue")
requireNamespace("glue")
glue <- glue::glue
# install.packages("tidyr")
requireNamespace("tidyr")
# BiocManager::install("org.Mm.eg.db")
# BiocManager::install("org.Hs.eg.db")
requireNamespace("org.Mm.eg.db")
requireNamespace("org.Hs.eg.db")
# BiocManager::install("GO.db")
requireNamespace("GO.db")
# https://www.bioconductor.org/packages/release/bioc/vignettes/goseq/inst/doc/goseq.pdf
# BiocManager::install("goseq")
# requireNamespace("goseq")
# install.packages("pracma")
requireNamespace("pracma")
# https://vroom.r-lib.org/articles/vroom.html
# install.packages("vroom")
requireNamespace("vroom")
# BiocManager::install("DESeq2")
requireNamespace("DESeq2")
# BiocManager::install("scater")
requireNamespace("scater")
# BiocManager::install("scran")
# requireNamespace("scran")
# BiocManager::install("igraph")
requireNamespace("igraph")
# install.packages("pheatmap")
requireNamespace("pheatmap")
# install.packages("gridExtra")
requireNamespace("gridExtra")
# install.packages("pathlibr")
requireNamespace("pathlibr")
# install.packages("stringr")
requireNamespace("stringr")
# install.packages("reshape2")
requireNamespace("reshape2")
# install.packages("pbapply")
# requireNamespace("pbapply")
# install.packages("Rtsne")
requireNamespace("Rtsne")
# install.packages("uwot")
requireNamespace("uwot")
# avoid importing `exprs` that leads to clashes
requireNamespace("rlang")
# install.packages("Seurat")
# requireNamespace("Seurat")
# BiocManager::install("TrajectoryUtils") # Didn't work
# devtools::install_github("LTLA/TrajectoryUtils@e943606")
# BiocManager::install("kstreet13/slingshot@25fc566")
# requireNamespace("slingshot")
# BiocManager::install("tradeSeq")
# requireNamespace("tradeSeq")
# install.packages("statmod")
# BiocManager::install("edgeR")
requireNamespace("edgeR")
# https://github.com/eliocamp/ggnewscale
# install.packages("ggnewscale")
# requireNamespace("ggnewscale")
# https://www.rdocumentation.org/packages/DGCA/versions/1.0.2
# BiocManager::install("impute")
# BiocManager::install("preprocessCore")
# BiocManager::install("GO.db")
# install.packages("WGCNA")
# install.packages("DGCA")
requireNamespace("DGCA")
# https://github.com/danielschw188/Revelio
# devtools::install_github('danielschw188/Revelio@e8e1492')
requireNamespace("Revelio")
# # install.packages("rgl")
# requireNamespace("rgl")
# install.packages("plotly")
requireNamespace("plotly")
# https://www.rdocumentation.org/packages/Hmisc/versions/4.5-0/topics/rcorr
# install.packages("Hmisc")
requireNamespace("Hmisc")
# BiocManager::install("multiGSEA")
# requireNamespace("multiGSEA")
# install.packages("tidygraph")
# install.packages("ggraph")
requireNamespace("ggraph")
requireNamespace("tidygraph")
# https://htmlpreview.github.io/?https://github.com/aertslab/SCENIC/blob/master/inst/doc/SCENIC_Setup.html
# BiocManager::install(c("AUCell", "RcisTarget"))
# BiocManager::install(c("GRNBoost"))
# BiocManager::install(c("doMC", "doRNG"))
# devtools::install_github("aertslab/SCENIC@0585e87")
# requireNamespace("SCENIC")
})
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/glue/libs/glue.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/xml2/libs/xml2.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/colorspace/libs/colorspace.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/systemfonts/libs/systemfonts.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/svglite/libs/svglite.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/bitops/libs/bitops.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/RCurl/libs/RCurl.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/fastmap/libs/fastmap.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/cachem/libs/cachem.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/vctrs/libs/vctrs.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/ellipsis/libs/ellipsis.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/fansi/libs/fansi.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/utf8/libs/utf8.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/tibble/libs/tibble.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/purrr/libs/purrr.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/dplyr/libs/dplyr.so") ...
## now dyn.load("/usr/lib/R/library/grid/libs/grid.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/Rcpp/libs/Rcpp.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/gridtext/libs/gridtext.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/ggrepel/libs/ggrepel.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/tidyr/libs/tidyr.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/bit/libs/bit.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/bit64/libs/bit64.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/RSQLite/libs/RSQLite.so") ...
## now dyn.load("/usr/lib/R/library/parallel/libs/parallel.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/Biobase/libs/Biobase.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/S4Vectors/libs/S4Vectors.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/IRanges/libs/IRanges.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/png/libs/png.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/XVector/libs/XVector.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/Biostrings/libs/Biostrings.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/vroom/libs/vroom.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/tzdb/libs/tzdb.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/BiocParallel/libs/BiocParallel.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/GenomicRanges/libs/GenomicRanges.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/matrixStats/libs/matrixStats.so") ...
## now dyn.load("/usr/lib/R/library/lattice/libs/lattice.so") ...
## now dyn.load("/usr/lib/R/library/Matrix/libs/Matrix.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/DelayedArray/libs/DelayedArray.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/XML/libs/XML.so") ...
## now dyn.load("/usr/lib/R/library/splines/libs/splines.so") ...
## now dyn.load("/usr/lib/R/library/survival/libs/survival.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/genefilter/libs/genefilter.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/locfit/libs/locfit.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/DESeq2/libs/DESeq2.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/sparseMatrixStats/libs/sparseMatrixStats.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/beachmat/libs/beachmat.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/scuttle/libs/scuttle.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/BiocNeighbors/libs/BiocNeighbors.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/irlba/libs/irlba.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/BiocSingular/libs/BiocSingular.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/beeswarm/libs/beeswarm.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/igraph/libs/igraph.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/plyr/libs/plyr.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/reshape2/libs/reshape2.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/Rtsne/libs/Rtsne.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/uwot/libs/uwot.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/limma/libs/limma.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/edgeR/libs/edgeR.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/fastcluster/libs/fastcluster.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/base64enc/libs/base64enc.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/jpeg/libs/jpeg.so") ...
## now dyn.load("/usr/lib/R/library/cluster/libs/cluster.so") ...
## now dyn.load("/usr/lib/R/library/foreign/libs/foreign.so") ...
## now dyn.load("/usr/lib/R/library/nnet/libs/nnet.so") ...
## now dyn.load("/usr/lib/R/library/rpart/libs/rpart.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/checkmate/libs/checkmate.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/backports/libs/backports.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/data.table/libs/datatable.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/Hmisc/libs/Hmisc.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/impute/libs/impute.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/preprocessCore/libs/preprocessCore.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/WGCNA/libs/WGCNA.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/lazyeval/libs/lazyeval.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/tidygraph/libs/tidygraph.so") ...
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=lzh_TW
## [4] LC_COLLATE=en_US.UTF-8 LC_MONETARY=lzh_TW LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=lzh_TW LC_NAME=C LC_ADDRESS=C
## [10] LC_TELEPHONE=C LC_MEASUREMENT=lzh_TW LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggplot2_3.3.5 dplyr_1.0.7
##
## loaded via a namespace (and not attached):
## [1] backports_1.2.1 Hmisc_4.5-0
## [3] systemfonts_1.0.2 plyr_1.8.6
## [5] igraph_1.2.6 lazyeval_0.2.2
## [7] splines_4.1.0 Revelio_0.1.0
## [9] BiocParallel_1.26.1 GenomeInfoDb_1.28.1
## [11] scater_1.20.1 digest_0.6.27
## [13] foreach_1.5.1 htmltools_0.5.1.1
## [15] viridis_0.6.1 GO.db_3.13.0
## [17] fansi_0.5.0 checkmate_2.0.0
## [19] magrittr_2.0.1 memoise_2.0.0
## [21] ScaledMatrix_1.0.0 cluster_2.1.2
## [23] doParallel_1.0.16 tzdb_0.1.1
## [25] limma_3.48.1 fastcluster_1.2.3
## [27] Biostrings_2.60.1 annotate_1.70.0
## [29] matrixStats_0.59.0 vroom_1.5.1
## [31] svglite_2.0.0 jpeg_0.1-8.1
## [33] colorspace_2.0-2 blob_1.2.1
## [35] rvest_1.0.0 ggrepel_0.9.1
## [37] xfun_0.24 crayon_1.4.1
## [39] RCurl_1.98-1.3 jsonlite_1.7.2
## [41] org.Mm.eg.db_3.13.0 genefilter_1.74.0
## [43] impute_1.66.0 survival_3.2-11
## [45] iterators_1.0.13 glue_1.4.2
## [47] kableExtra_1.3.4 gtable_0.3.0
## [49] zlibbioc_1.38.0 XVector_0.32.0
## [51] webshot_0.5.2 DelayedArray_0.18.0
## [53] BiocSingular_1.8.1 SingleCellExperiment_1.14.1
## [55] BiocGenerics_0.38.0 scales_1.1.1
## [57] pheatmap_1.0.12 DBI_1.1.1
## [59] edgeR_3.34.0 Rcpp_1.0.6
## [61] htmlTable_2.2.1 viridisLite_0.4.0
## [63] xtable_1.8-4 gridtext_0.1.4
## [65] foreign_0.8-81 bit_4.0.4
## [67] rsvd_1.0.5 preprocessCore_1.54.0
## [69] Formula_1.2-4 stats4_4.1.0
## [71] DT_0.18 htmlwidgets_1.5.3
## [73] httr_1.4.2 RColorBrewer_1.1-2
## [75] ellipsis_0.3.2 pkgconfig_2.0.3
## [77] XML_3.99-0.6 scuttle_1.2.0
## [79] nnet_7.3-16 sass_0.4.0
## [81] uwot_0.1.10 locfit_1.5-9.4
## [83] utf8_1.2.1 dynamicTreeCut_1.63-1
## [85] tidyselect_1.1.1 rlang_0.4.11
## [87] reshape2_1.4.4 AnnotationDbi_1.54.1
## [89] munsell_0.5.0 tools_4.1.0
## [91] cachem_1.0.5 generics_0.1.0
## [93] RSQLite_2.2.7 evaluate_0.14
## [95] stringr_1.4.0 fastmap_1.1.0
## [97] yaml_2.2.1 org.Hs.eg.db_3.13.0
## [99] knitr_1.33 bit64_4.0.5
## [101] tidygraph_1.2.0 purrr_0.3.4
## [103] KEGGREST_1.32.0 sparseMatrixStats_1.4.0
## [105] pracma_2.3.3 xml2_1.3.2
## [107] compiler_4.1.0 rstudioapi_0.13
## [109] plotly_4.9.4.1 beeswarm_0.4.0
## [111] png_0.1-7 tibble_3.1.2
## [113] geneplotter_1.70.0 bslib_0.2.5.1
## [115] stringi_1.6.2 lattice_0.20-44
## [117] DGCA_1.0.2 Matrix_1.3-4
## [119] ggsci_2.9 vctrs_0.3.8
## [121] pillar_1.6.1 lifecycle_1.0.0
## [123] jquerylib_0.1.4 BiocNeighbors_1.10.0
## [125] data.table_1.14.0 bitops_1.0-7
## [127] irlba_2.3.3 GenomicRanges_1.44.0
## [129] latticeExtra_0.6-29 R6_2.5.0
## [131] gridExtra_2.3 vipor_0.4.5
## [133] IRanges_2.26.0 codetools_0.2-18
## [135] assertthat_0.2.1 SummarizedExperiment_1.22.0
## [137] DESeq2_1.32.0 withr_2.4.2
## [139] S4Vectors_0.30.0 GenomeInfoDbData_1.2.6
## [141] parallel_4.1.0 ggtext_0.1.1
## [143] rpart_4.1-15 grid_4.1.0
## [145] pathlibr_0.1.0 beachmat_2.8.0
## [147] tidyr_1.1.3 rmarkdown_2.9
## [149] DelayedMatrixStats_1.14.0 MatrixGenerics_1.4.0
## [151] Rtsne_0.15 Biobase_2.52.0
## [153] WGCNA_1.70-3 base64enc_0.1-3
## [155] ggbeeswarm_0.6.0
set.seed(43)
dual <- function(f, g) {
if (missing(g)) {
stopifnot(class(f) == "function")
(function(L) do.call(f, Filter(Negate(is.null), L)))
} else {
stopifnot(class(f) == "list")
stopifnot(class(g) == "function")
dual(g)(f)
}
}
Example 1:
list(
biggle = list(A = "hello", B = "world"),
wiggle = list(A = "Hello", B = "World", C = NULL)
) %>%
lapply(dual(function(B, A) paste(A, B)))
## $biggle
## [1] "hello world"
##
## $wiggle
## [1] "Hello World"
Example 2:
list(
A = data.frame(x = c("xa1", "xa2"), y = c("ya1", "ya2")),
B = data.frame(x = c("xb1", "xb2"), y = c("yb1", "yb2")),
C = data.frame(x = c("xc1", "xc2"), y = c("yc1", "yc2"))
) %>%
# No need for the brackets!
dual(cbind)
## A.x A.y B.x B.y C.x C.y
## 1 xa1 ya1 xb1 yb1 xc1 yc1
## 2 xa2 ya2 xb2 yb2 xc2 yc2
culprits <- function() {
search()[[1]] %>%
ls(name = .) %>%
sapply(. %>% get() %>% object.size() %>% "/"(1e6)) %>%
data.frame(size = .) %>%
arrange(size) %>%
mutate(cumsize = cumsum(size)) %>%
arrange(desc(size)) %>%
mutate(across(everything(), ~ signif(.x, digits = 2))) %>%
mutate(unit = "Mb")
}
Example:
culprits() %>% head(3)
## size cumsize unit
## dual 0.074 0.120 Mb
## glue 0.022 0.048 Mb
## culprits 0.011 0.026 Mb
for (i in 1:2) {
BASEPATH <- pathlibr::Path$new(Sys.glob(paste(c("work", ".."), "*FibroAct", sep = "/")))
setwd(BASEPATH$show)
}
print(paste("Work path:", BASEPATH))
## [1] "Work path: ../20210502-FibroAct"
stopifnot("main.Rmd" %in% names(BASEPATH$dir))
path_to <- (function(.) Sys.glob(BASEPATH$join("../..")$join(.)$show))
out_file <- (function(.) BASEPATH$join("output")$join(.)$show)
dir.create(out_file(""), showWarnings = FALSE)
memo <- function(f) {
memoise::memoise(
f,
cache = cachem::cache_disk(BASEPATH$join("main_cache/memoized")$show)
)
}
Basic table printing.
kable <- function(.) {
kableExtra::kbl(., align = "c") %>%
kableExtra::kable_paper("hover", full_width = FALSE)
}
Options for fancy datatable printing.
options(DT.options = list(
pageLength = 16,
language = list(search = "Filter:"),
scrollX = TRUE
))
Converting a symbol or GO ID to a link.
as.link <-
function(x) {
dplyr::case_when(
# If x is like GO:003453
startsWith(x, "GO:") ~ paste0(
"<a target='_blank'",
"href='", "http://amigo.geneontology.org/amigo/term/", x, "'",
">", x, "</a>"
),
# Otherwise try this
TRUE ~ paste0(
"<a target='_blank'",
"href='", "http://www.informatics.jax.org/marker/summary?nomen=", x, "'",
">", x, "</a>"
)
)
}
Example
data.frame(x = c("GO:00345", "Lama")) %>%
mutate(link = as.link(x)) %>%
DT::datatable(width = "100%")
Printing a sequence of tables
kable_all <- function(tt) {
for (t in tt) {
t %>%
table() %>%
t() %>%
kable() %>%
print()
}
}
# with results='asis'
list(
a = c(c(1:5), c(2:5), c(3:5)),
b = c(c(1:6), c(2:6), c(3:6)),
b = c(c(1:7), c(2:7), c(3:7)),
b = c(c(1:9), c(2:9), c(3:9))
) %>% kable_all()
| 1 | 2 | 3 | 4 | 5 |
|---|---|---|---|---|
| 1 | 2 | 3 | 3 | 3 |
| 1 | 2 | 3 | 4 | 5 | 6 |
|---|---|---|---|---|---|
| 1 | 2 | 3 | 3 | 3 | 3 |
| 1 | 2 | 3 | 4 | 5 | 6 | 7 |
|---|---|---|---|---|---|---|
| 1 | 2 | 3 | 3 | 3 | 3 | 3 |
| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
|---|---|---|---|---|---|---|---|---|
| 1 | 2 | 3 | 3 | 3 | 3 | 3 | 3 | 3 |
ggplot2::theme_set(theme_light(base_size = 15))
gglast <- function(what) {
p <- ggplot2::last_plot()
q <- p$mapping[[deparse(substitute(what))]]
rlang::eval_tidy(q, data = p$data)
}
# example: scale_fill_manual(values = color_map, breaks = gglast(fill))
# Converts an annotation column (e.g. sce$group)
# to a named vector of colors (group -> color)
# List of palettes: grDevices::hcl.pals()
# http://colorspace.r-forge.r-project.org/reference/hcl_palettes.html
ramp <- function(anno, palette = "Temps") {
anno %>%
as.character() %>%
unique() %>%
c(NA) %>%
data.frame(
item = .,
# https://developer.r-project.org/Blog/public/2019/04/01/hcl-based-color-palettes-in-grdevices/
color = length(.) %>% colorRampPalette(grDevices::hcl.colors(n = ., palette = palette))(.)
) %>%
tidyr::drop_na() %>%
pull(color, item)
}
Example:
ramp(c("Grp1", "Grp2", "Grp1"), palette = "Dark 3") %>%
t() %>%
kable()
| Grp1 | Grp2 |
|---|---|
| #E16A86 | #50A315 |
RGB <- function(arr, alpha = 1) {
stopifnot(ncol(arr) == 3)
arr %>%
as.data.frame() %>%
`/`(255) %>%
mutate(alpha = alpha) %>%
mutate(colors = apply(., 1, \(x) rgb(x[1], x[2], x[3], alpha = x[["alpha"]]))) %>%
pull(colors)
}
Example:
(1:7) %>%
percent_rank() %>%
(colorRamp(c("blue", "red"))) %>%
RGB(alpha = 0.5) %>%
t() %>%
kable()
| #0000FF80 | #2B00D580 | #5500AA80 | #80008080 | #AA005580 | #D5002A80 | #FF000080 |
Colors for consistency; may be overwritten below.
colors <- unlist(
c(
list(
`374_VLMC` = "aquamarine3",
`375_VLMC` = "chartreuse4",
`376_VLMC` = "chartreuse3",
`FB1` = "cornflowerblue",
`FB2` = "blue",
`Ctrl` = "chartreuse4",
`EAE` = "coral2",
`VLMC1` = "chartreuse1",
`VLMC3` = "coral3",
`VLMC4` = "coral4",
`healthy` = "green",
`EAE1` = "coral1",
`EAE2` = "coral3",
`healthy1` = "aquamarine",
`healthy2` = "chartreuse3",
`healthy3` = "chartreuse4",
`positive` = "Red4",
`negative` = "Steelblue1"
),
ramp(c("G1.S", "S", "G2", "G2.M", "M.G1"), palette = "Dark 3")
)
)
colors %>%
unlist() %>%
data.frame(c = .) %>%
mutate(item = rownames(.)) %>%
ggplot(aes(y = item, x = 1)) +
geom_tile(fill = colors, stat = "identity") +
scale_y_discrete(limits = rev(names(colors))) +
theme(
axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
axis.title = element_blank()
)
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/farver/libs/farver.so") ...

Zoomable figures.
///*
$(document).ready(
function() {
$("img.zoomable").on("click", function(evt) {
if (!evt.originalEvent.shiftKey && evt.originalEvent.altKey) {
var el = $(this).clone();
el.attr("width", "2048px");
window.open().document.write(el.wrap('<p>').parent().html());
return false;
}
});
}
);
//*/
Alt+click on a figure to zoom:
list(
randu %>% ggplot(aes(x = x, y = y)) +
geom_point() +
ggtitle("Zoomable 1"),
randu %>% ggplot(aes(x = x, y = z)) +
geom_point() +
ggtitle("Zoomable 2")
)


Click-scrollable figures (like slides):
$(document).ready(
function() {
$("img.scrollable").css({
'position': 'absolute', 'top': 0, 'left': 0, 'z-index': 100000,
});
var parent = $("img.scrollable").parent();
parent.css({'position': 'relative'});
parent.find('img:first').css({'position': 'relative'});
// associate a 'history' stack with each parent
parent.each(function(i, el) { $(el).data("stack", new Array()); })
$("img.scrollable").on("click", function(evt) {
if (evt.originalEvent.altKey) return true;
if (!evt.originalEvent.shiftKey) {
var last = $(this);
last.parent().data("stack").push(last);
if (last) last.css({'z-index': parseInt(last.css('z-index'), 10) - 1});
} else {
var last = $(this).parent().data("stack").pop();
if (last) last.css({'z-index': parseInt(last.css('z-index'), 10) + 1});
}
evt.stopPropagation();
return false;
});
}
);
Click a figure to slide:
list(
randu %>% ggplot(aes(x = x, y = y)) +
geom_point() +
ggtitle("Slide 1"),
randu %>% ggplot(aes(x = x, y = z)) +
geom_point() +
ggtitle("Slide 2")
)


# Use first column as index
by_col1 <- (function(.) tibble::column_to_rownames(., colnames(.)[1]))
# Use index as new column `name`
ind2col <- (function(., name) tibble::rownames_to_column(., var = name))
take_rows <- function(df, rows = NULL) {
if (!is.null(rows)) {
return(df[rows[rows %in% rownames(df)], , drop = FALSE])
} else {
return(df)
}
}
# For reading wide tables
Sys.setenv("VROOM_CONNECTION_SIZE" = 2 ** 19)
from_tsv <- function(filename, sep = "\t") {
filename %>%
vroom::vroom(del = sep, progress = FALSE) %>%
suppressMessages() %>%
by_col1()
}
# Writes dataframe to tab-separated file
# Precede by `ind2col("name for index")` to write the row names
to_tsv <- function(df, fn, sep = "\t") {
vroom::vroom_write(df, out_file(fn), delim = sep)
}
# A small random sub-dataframe (intended for genes x samples)
shample <- function(df) {
df[
sample(rownames(df), min(nrow(.), 333)),
sample(colnames(df), min(ncol(.), 33))
]
}
# Constructs a `SingleCellExperiment` checking consistency of sample names
make_sce <- function(counts, coldata, dealias = TRUE) {
# Check that samples in counts = genes x samples
# are ordered as in coldata = samples x metafields
stopifnot(assertthat::are_equal(colnames(counts), rownames(coldata)))
if (dealias) {
# TODO: get a list of aliases
gene_alias = data.frame(
old = c("1810011O10Rik", "F830016B08Rik", "A730017C20Rik"),
new = c("Tcim", "Ifgga4", "Minar2")
)
# Check that the "old -> new" mapping is well-defined
stopifnot(!any(duplicated(gene_alias$old)))
# Note: we are not checking whether rownames(counts) is unique
new <-
rownames(counts) %>%
data.frame(old = .) %>%
# For all joins, rows will be duplicated if one or more rows in x matches multiple rows in y
left_join(y = gene_alias, by = "old") %>%
# Coalesce
mutate(new = if_else(!is.na(new), new, old)) %>%
pull(new, old) %>%
# "'names(x)' must be NULL or a character vector with no attributes"
as.character()
# Most gene names should be still the same
stopifnot(mean(new == rownames(counts)) > 0.9)
counts %<>%
magrittr::set_rownames(new)
}
SingleCellExperiment::SingleCellExperiment(
assays = list(counts = counts), colData = coldata
)
}
mock_sce <- function(genes, ncells) {
sce <- scater::mockSCE(ngenes = length(genes), ncells = ncells)
sce <- make_sce(
sce@assays@data$counts %>% as.data.frame(row.names = genes),
sce@colData %>% as.data.frame() %>% rename(celltype = Mutation_Status)
)
sce
}
drop_empty <- function(sce) {
sce[, colSums(abs(sce@assays@data$counts)) != 0]
}
# Normalizes samplewise to sum(expr) = 1
# Consider using sce %>% scater::logNormCounts() %>% ...
norm1 <- function(sce) {
sce@assays@data$counts %<>%
t() %>%
{
(.) / (rowSums(.))
} %>%
t()
sce
}
# Example
mock_sce(genes = LETTERS[1:10], ncells = 3) %>%
take_rows(LETTERS[1:5]) %>%
drop_empty() %>%
norm1() %>%
scater::logNormCounts() %>%
scater::aggregateAcrossFeatures(ids = c("V", "C", "C", "C", "V"), average = TRUE)
## class: SingleCellExperiment
## dim: 2 3
## metadata(0):
## assays(1): counts
## rownames(2): C V
## rowData names(0):
## colnames(3): Cell_001 Cell_002 Cell_003
## colData names(4): celltype Cell_Cycle Treatment sizeFactor
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
taxid <- list(mm = 10090, hs = 9606)
| mm | hs |
|---|---|
| 10090 | 9606 |
symbol2geneid <- function(genes) {
data.frame(symbol = genes) %>%
merge(
y = org.Mm.eg.db::org.Mm.egSYMBOL2EG %>% as.data.frame(),
all.x = TRUE,
by = "symbol",
sort = FALSE # !
) %>%
pull(gene_id, symbol) %>%
`[`(genes) # restore the original order
}
c("Jun", "Junb", "Junc") %>%
data.frame(symbol = .) %>%
mutate(gene_id = symbol2geneid(symbol)) %>%
kable()
| symbol | gene_id |
|---|---|
| Jun | 16476 |
| Junb | 16477 |
| Junc | NA |
omnipath.in <-
path_to("*/*/*/omnipath_in.csv.gz") %>%
from_tsv() %>%
select(
source, source_genesymbol, target, target_genesymbol,
organism,
is_inhibition, is_stimulation
)
Preview:
omnipath.tf <-
path_to("*/*/*/omnipath_tf.csv.gz") %>%
from_tsv() %>%
select(
source, source_genesymbol, target, target_genesymbol,
organism,
is_inhibition, is_stimulation
)
Preview:
omnipath.lr <-
path_to("*/*/*/omnipath_lr.csv.gz") %>%
from_tsv()
Preview:
We use Omnipath to annotate genes with their role using the following function.
# For a vector of gene symbols returns
# a vector containing the role of each symbol
symbol2role <- function(symbol, organism = taxid$mm) {
f <- function(df) {
df %>% filter(organism == organism)
}
data.frame(
TF = if_else(symbol %in% f(omnipath.tf)$source_genesymbol, "TF", ""),
Tgt = if_else(symbol %in% f(omnipath.tf)$target_genesymbol, "Tgt", ""),
Lig = if_else(symbol %in% f(omnipath.lr)$source_genesymbol, "Lig", ""),
Rec = if_else(symbol %in% f(omnipath.lr)$target_genesymbol, "Rec", "")
) %>%
apply(MARGIN = 1, FUN = function(row) {
if (any(nzchar(row))) {
paste(row[nzchar(row)], collapse = "/")
} else {
"--"
}
})
}
For example:
data.frame(symbol = c("Jun", "Notch1", "Xyz")) %>%
mutate(role = symbol2role(symbol)) %>%
kable()
| symbol | role |
|---|---|
| Jun | TF/Tgt |
| Notch1 | Tgt/Lig/Rec |
| Xyz | – |
The following provides a gene name (description) and a longer explanation (summary).
gene.summary <-
taxid %>%
lapply(
function(i) {
path_to(paste0("*/NCBI/gene/*/", names(which(. == i)), ".tsv.gz")) %>%
from_tsv() %>%
ind2col("gene_id")
}
)
Preview:
The following function sets up a gene id/symbol homology table.
# homology(taxid$mm, taxid$hs) %>% head
# gives a dataframe like
# gene_id.y gene_id.x symbol.x symbol.y
# 1 1 117586 A1bg A1BG
# 2 2 232345 A2m A2M
# 3 9 17961 Nat2 NAT1
# 4 10 17960 Nat1 NAT2
# 5 12 16625 Serpina3c SERPINA3
# 6 13 67758 Aadac AADAC
homology <- function(taxid.x, taxid.y) {
# Assume that `gene_id` is unique
symbols <- rbind(
org.Mm.eg.db::org.Mm.egSYMBOL2EG %>% as.data.frame(),
org.Hs.eg.db::org.Hs.egSYMBOL2EG %>% as.data.frame()
)
path_to("*/MGI/*/vertebrate_homology.tsv*") %>%
from_tsv() %>%
select(gene_id = EntrezGene_ID, taxid = NCBI_Taxon_ID, hg = HomoloGene_ID) %>%
filter(taxid %in% c(taxid.x, taxid.y)) %>%
mutate(taxid = if_else(taxid == taxid.x, "x", "y")) %>%
stats::reshape(direction = "wide", idvar = "hg", timevar = "taxid") %>%
# "multiple rows match for taxid"
suppressWarnings() %>%
select(gene_id.x, gene_id.y) %>%
merge(y = symbols %>% select(gene_id.x = gene_id, symbol.x = symbol), by = "gene_id.x") %>%
merge(y = symbols %>% select(gene_id.y = gene_id, symbol.y = symbol), by = "gene_id.y")
}
For example:
homology(taxid$mm, taxid$hs) %>%
head(11) %>%
{
.[, order(colnames(.))]
} %>%
mutate(seems.resonable = (tolower(symbol.x) == tolower(symbol.y))) %>%
rbind("...") %>%
kable()
| gene_id.x | gene_id.y | symbol.x | symbol.y | seems.resonable |
|---|---|---|---|---|
| 117586 | 1 | A1bg | A1BG | TRUE |
| 232345 | 2 | A2m | A2M | TRUE |
| 17961 | 9 | Nat2 | NAT1 | FALSE |
| 17960 | 10 | Nat1 | NAT2 | FALSE |
| 16625 | 12 | Serpina3c | SERPINA3 | FALSE |
| 67758 | 13 | Aadac | AADAC | TRUE |
| 227290 | 14 | Aamp | AAMP | TRUE |
| 11298 | 15 | Aanat | AANAT | TRUE |
| 234734 | 16 | Aars | AARS1 | FALSE |
| 268860 | 18 | Abat | ABAT | TRUE |
| 11303 | 19 | Abca1 | ABCA1 | TRUE |
| … | … | … | … | … |
We use that to add description/summary to mouse genes by homology.
gene.summary$mm %<>%
merge(
x = .,
y = merge(
x = gene.summary$hs %>%
select(gene_id, summ = summary, desc = description) %>%
mutate(desc = if_else(is.na(desc), desc, paste("By homology:", desc))) %>%
mutate(summ = if_else(is.na(summ), summ, paste("By homology:", summ))),
y = path_to("*/MGI/*/vertebrate_homology.tsv*") %>%
from_tsv() %>%
filter(NCBI_Taxon_ID %in% taxid) %>%
select(gene_id = EntrezGene_ID, taxid = NCBI_Taxon_ID, hg = HomoloGene_ID) %>%
stats::reshape(direction = "wide", idvar = "hg", timevar = "taxid") %>%
select(gene_id = gene_id.9606, alt_gene_id = gene_id.10090),
by = "gene_id",
all = FALSE
) %>%
select(gene_id = alt_gene_id, alt_desc = desc, alt_summ = summ),
by = "gene_id",
all.x = TRUE,
all.y = FALSE,
sort = FALSE
) %>%
mutate(summary = if_else(!is.na(summary), summary, alt_summ)) %>%
mutate(description = if_else(!is.na(description), description, alt_desc)) %>%
select(-alt_desc, -alt_summ)
## Warning in reshapeWide(data, idvar = idvar, timevar = timevar, varying =
## varying, : multiple rows match for taxid=10090: first taken
## Warning in reshapeWide(data, idvar = idvar, timevar = timevar, varying =
## varying, : multiple rows match for taxid=9606: first taken
Preview:
goi <-
list(
neuroinflammation = list(
name = "Neuroinflammation",
genes = unique(c(
# "Early response" transcription factors: Fos, Fosb, Junb
# From 2018-Vanlandewijck-He-...-Betsholtz:
# "A molecular atlas of cell types and zonation in the brain vasculature"
c("Fos", "Fosb", "Junb", "Egr1"), # Jun is below
# Wikipathways Neuroinflammation:
# https://www.wikipathways.org/instance/WP4919_r111814
# Downloaded from
# http://www.gsea-msigdb.org/gsea/msigdb/cards/WP_NEUROINFLAMMATION.html
c("Ascc1", "Chuk", "Fos", "Jun", "Mapk14", "Mapk8", "mt-Co1", "mt-Co2", "Mtor", "Nfkbia", "Nos2", "Rela", "Tlr4")
))
),
# From 2018-Vanlandewijck-He-...-Betsholtz
# Pericyte markers:
# Pdgfrb, Cspg4, and Des
# SMC:
# Acta2 and Tagln
# Fibroblast markers:
# Pdgfra, Lum and Dcn
pvf = list(
name = "PVF",
genes = c(
# From 2021-Manberg-...-Lewandowski, p.643
"Spp1", "Col6a1", "Col1a1", "Mmp2",
# From 2018-Vanlandewijck-He-...-Betsholtz
"Pdgfra", "Lum", "Dcn"
)
),
# Collagen
col = list(
name = "Collagen",
genes = c(
"Col10a1", "Col11a1", "Col11a2", "Col12a1", "Col13a1", "Col14a1",
"Col15a1", "Col16a1", "Col17a1", "Col18a1", "Col19a1", "Col1a1",
"Col1a2", "Col20a1", "Col22a1", "Col23a1", "Col24a1", "Col25a1",
"Col26a1", "Col27a1", "Col28a1", "Col2a1", "Col3a1", "Col4a1",
"Col4a2", "Col4a3", "Col4a3bp", "Col4a4", "Col4a5", "Col4a6",
"Col5a1", "Col5a2", "Col5a3", "Col6a1", "Col6a2", "Col6a3",
"Col6a4", "Col6a5", "Col6a6", "Col7a1", "Col8a1", "Col8a2",
"Col9a1", "Col9a2", "Col9a3", "Colec10", "Colec11", "Colec12",
"Colgalt1", "Colgalt2", "Colq"
)
# Obtained using
# sce_dm %>% rownames() %>% { (.)[stringr::str_detect(., "^Col")] } %>% sort()
),
# Mitochondrial
mt = list(
name = "Mitochondrial",
genes = c(
"mt-Atp6", "mt-Atp8", "mt-Co1", "mt-Co2", "mt-Co3", "mt-Cytb",
"mt-Nd1", "mt-Nd2", "mt-Nd3", "mt-Nd4", "mt-Nd4l", "mt-Nd5", "mt-Nd6"
)
),
# Obtained using (e.g.)
# sce_dm %>% rownames() %>% { (.)[stringr::str_detect(., "^mt-")] } %>% sort()
air = list(
name = "Aerobic respiration",
genes = c(
# GO:1903715 (regulation of aerobic respiration)
# http://www.informatics.jax.org/vocab/gene_ontology/GO:1903715
c("Actn3", "Akt1", "Bnip3", "Cbfa2t3", "Hif1a", "Ide", "Nop53", "Shmt2", "Trpv4", "Vcp"),
# GO:0009060 (aerobic respiration) minus GO:1903715 and minus 'mt-Co3'
# http://www.informatics.jax.org/vocab/gene_ontology/GO:0009060
c("Aco1", "Aco2", "Adsl", "Afg1l", "Atp5f1d", "Bloc1s1", "Cat", "Cox10", "Cox4i1", "Cox4i2", "Cox5a", "Cox5b", "Cox6a1", "Cox6a2", "Cox7c", "Cs", "Csl", "Cycs", "Cyct", "Dhtkd1", "Dlat", "Dlst", "Fh", "Fxn", "Idh1", "Idh2", "Idh3a", "Idh3b", "Idh3g", "Ireb2", "Mdh1", "Mdh1b", "Mdh2", "Mtco1", "Mtfr1", "Mtfr1l", "Mtfr2", "Mtnd1", "Mtnd4", "Mup1", "Mup11", "Mup2", "Mup3", "Mup4", "Mup5", "Mup6", "Ndufs4", "Ndufs7", "Ndufs8", "Ogdh", "Ogdhl", "Oxa1l", "Pank2", "Pdha1", "Pdha2", "Pdhb", "Proca1", "Q8BPC6", "Sdha", "Sdhaf2", "Sdhb", "Sdhc", "Sdhd", "Sirt3", "Sucla2", "Suclg1", "Suclg2", "Ucn", "Uqcr10", "Uqcrb", "Uqcrh")
)
),
fib_mig = list(
name = "Reg. fib. mig.",
genes = c(
# GO:0010762 (regulation of fibroblast migration)
# http://www.informatics.jax.org/vocab/gene_ontology/GO:0010762
c("Acta2", "Actr3", "Adipor2", "Ager", "Akap12", "Akt1", "Appl1", "Appl2", "Arhgap4", "Bag4", "Braf", "Cln3", "Coro1c", "Cygb", "Ddr2", "Dmtn", "Fer", "Fgf2", "Fgfr1", "Gna12", "Has1", "Hyal2", "Itgb1", "Itgb1bp1", "Itgb3", "MACIR", "Mmp1a", "Mta2", "Pak1", "Pak3", "Prkce", "Prr5l", "Ptk2", "Rac1", "Rcc2", "Rffl", "Sdc4", "Slc8a1", "Tgfb1", "Thbs1", "Tsc2", "Uts2", "Wdpcp")
)
),
glycolysis = list(
name = "Anaerobic glycolysis",
genes = c(
# GO:0006096 (glycolytic process / anaerobic glycolysis)
# http://www.informatics.jax.org/vocab/gene_ontology/GO:0006096
c("Actn3", "Adpgk", "Aldoa", "Aldoart1", "Aldoart2", "Aldob", "Aldoc", "App", "Bpgm", "Cbfa2t3", "Ddit4", "Dhtkd1", "Eif6", "Eno1", "Eno2", "Eno3", "Eno4", "Entpd5", "Ep300", "Esrrb", "Fbp1", "Foxk1", "Foxk2", "Gale", "Galk1", "Galt", "Gapdh", "Gapdhs", "Gck", "Git1", "Gm10358", "Gm11214", "Gm12117", "Gm15294", "Gm3839", "Gpd1", "Gpi", "Hdac4", "Hif1a", "Hk1", "Hk2", "Hk3", "Hkdc1", "Htr2a", "Ier3", "Ifng", "Igf1", "Il3", "Ins2", "Insr", "Jmjd8", "Khk", "Mif", "Mlxipl", "Mpi", "Mtch2", "Myc", "Myog", "Ncor1", "Nupr1", "Ogdh", "Ogt", "P2rx7", "Pfkfb2", "Pfkl", "Pfkm", "Pfkp", "Pgam1", "Pgam2", "Pgk1", "Pgk2", "Pklr", "Pkm", "Ppara", "Ppargc1a", "Prkaa1", "Prkaa2", "Prkag2", "Prkag3", "Prxl2c", "Psen1", "Sirt6", "Slc2a6", "Slc4a1", "Slc4a4", "Stat3", "Tigar", "Tkfc", "Tpi1", "Trex1", "Zbtb20", "Zbtb7a")
)
),
# https://reactome.org/PathwayBrowser/#/R-MMU-877300&SEL=R-MMU-873829&DTAB=MT
# Motivated by https://www.nature.com/articles/s41593-020-00770-9
ifng_signaling = list(
name = "Interferon gamma signaling",
genes = c("Ifngr1", "Ifngr2", "Jak1", "Jak2", "Ifng")
),
eae_common_up = list(
name = "Consistently up in EAE",
genes = c("Wif1", "Flrt2", "Tcim", "Ifi47", "Aqp1", "Npy1r", "Serpina3g", "Tbc1d7", "Mical2", "Kng1", "Plac8", "Rai14", "Cd74", "Adam12", "Tjp2", "Cd44", "H2-Aa", "Cldn11", "Ctxn1")
)
)
# Use a function because the list may change
get_goi_genes <- function() {
goi %>%
lapply(as.data.frame) %>%
dual(base::rbind) %>%
pull(genes) %>%
unique()
}
get_goi_genes()[1:7] %>%
c("...") %>%
t() %>%
kable()
| Fos | Fosb | Junb | Egr1 | Ascc1 | Chuk | Jun | … |
# Return a vector `goi_set` indicating the gene set for each `symbol`
symbol2goiset <- function(genes) {
df <- data.frame(symbol = genes, goi_set = "Other")
for (goi in goi) {
df %<>% mutate(goi_set = if_else(symbol %in% goi$genes, goi$name, goi_set))
}
(df %>% pull(goi_set, symbol))[genes]
}
show_gene_info_table <- function(df) {
df$symbols # need this column
df %>%
mutate(role = symbol2role(symbol)) %>%
mutate(goi_set = symbol2goiset(symbol)) %>%
mutate(gene_id = symbol2geneid(symbol)) %>%
merge(gene.summary$mm, all.x = TRUE, by = "gene_id", sort = FALSE) %>%
mutate(description = if_else(is.na(summary), description, paste0("<details>", "<summary>", description, "</summary>", summary, "</details>"))) %>%
mutate(description = paste0("<span style='font-size:70%'>", description, "</span>")) %>%
select(-summary) %>%
mutate(symbol = as.link(symbol)) %>%
DT::datatable(rownames = NULL, escape = FALSE, width = "100%")
}
get_goi_genes() %>%
sample(size = 3) %>%
data.frame(symbol = .) %>%
rbind("...") %>%
show_gene_info_table()
# TODO: normalize with edgeR?
# Plots the PCA, showing the expression of genes
show_goi_pca <- function(sce, genes, group.by = "celltype", colors = ramp(as.character(sce@colData[[group.by]]))) {
cbind(
# Reduced dim coordinates
sce@int_colData$reducedDims$PCA %>%
as.data.frame(row.names = colnames(sce)) %>%
select(x = PC1, y = PC2),
# Metadata
sce@colData %>%
as.data.frame(row.names = colnames(sce)) %>%
select(celltype = all_of(group.by)),
# Expression data for genes-of-interest
sce@assays@data$counts %>%
log1p() %>%
`[`(genes[genes %in% rownames(.)], ) %>%
t()
) %>%
# Make long table
reshape2::melt(
# Keep as separate columns:
id.vars = c("x", "y", "celltype"),
# These columns will be stacked, in a column with this name:
measure.vars = genes, variable.name = "symbol"
) %>%
# Plot
ggplot(aes(x = x, y = y, color = celltype)) +
geom_point(aes(size = value), alpha = 0.4) +
scale_color_manual(values = colors, breaks = gglast(colour)) +
# theme_void() +
theme(
axis.title = element_blank(),
axis.text = element_blank(),
) +
labs(color = group.by, size = "Expression") +
facet_wrap(vars(symbol))
}
get_goi_genes() %>%
mock_sce(genes = ., ncells = 77) %>%
scater::logNormCounts() %>%
scater::runPCA() %>%
show_goi_pca(goi$pvf$genes, group.by = "Cell_Cycle", colors = list(`G0` = "Red", `G1` = "Blue", `G2M` = "Orange", `S` = "DarkGreen", `Dead` = "Black"))
## Warning in (function (A, nv = 5, nu = nv, maxit = 1000, work = nv + 7, reorth =
## TRUE, : You're computing too large a percentage of total singular values, use a
## standard svd instead.

# Aggregates genes by `group.by` for each geneset from `goi`
goi_agg_by_group <- function(sce, group.by = "celltype") {
goi %>%
lapply(function(goi) {
(dual(rbind))(as.list(group.by) %>% lapply(function(group.by) {
if (any(goi$genes %in% rownames(sce))) {
cbind(
# Metadata
sce@colData %>%
as.data.frame(row.names = colnames(sce)) %>%
select(subgroup = all_of(group.by)),
# Expression data for genes-of-interest
sce %>%
scater::logNormCounts() %>%
SingleCellExperiment::logcounts() %>%
take_rows(goi$genes) %>%
t()
) %>%
# Aggregate by sample
group_by(subgroup) %>%
summarise(across(everything(), mean)) %>%
# Make long table
reshape2::melt(
# Keep as separate columns:
id.vars = c("subgroup"),
# Other columns will be stacked, in a column with this name:
variable.name = "symbol"
# A column `value` contains their values
) %>%
mutate(group = group.by) %>%
mutate(goi_set = goi$name)
}
}))
}) %>%
# Relabel the elements of the list
dual(rbind) %>%
split(.$goi_set)
# cf.
# https://stackoverflow.com/questions/33775239/emulate-split-with-dplyr-group-by-return-a-list-of-data-frames
}
get_goi_genes() %>%
mock_sce(genes = ., ncells = 77) %>%
goi_agg_by_group(group.by = "Treatment") %>%
lapply(head)
## $`Aerobic respiration`
## subgroup symbol value group goi_set
## air.1 treat1 Actn3 4.364586 Treatment Aerobic respiration
## air.2 treat2 Actn3 3.399285 Treatment Aerobic respiration
## air.3 treat1 Akt1 5.422383 Treatment Aerobic respiration
## air.4 treat2 Akt1 4.679099 Treatment Aerobic respiration
## air.5 treat1 Bnip3 6.777997 Treatment Aerobic respiration
## air.6 treat2 Bnip3 6.168800 Treatment Aerobic respiration
##
## $`Anaerobic glycolysis`
## subgroup symbol value group goi_set
## glycolysis.1 treat1 Actn3 4.3645856 Treatment Anaerobic glycolysis
## glycolysis.2 treat2 Actn3 3.3992853 Treatment Anaerobic glycolysis
## glycolysis.3 treat1 Adpgk 8.0364236 Treatment Anaerobic glycolysis
## glycolysis.4 treat2 Adpgk 7.7672360 Treatment Anaerobic glycolysis
## glycolysis.5 treat1 Aldoa 0.5537940 Treatment Anaerobic glycolysis
## glycolysis.6 treat2 Aldoa 0.6600854 Treatment Anaerobic glycolysis
##
## $Collagen
## subgroup symbol value group goi_set
## col.1 treat1 Col10a1 4.218582 Treatment Collagen
## col.2 treat2 Col10a1 4.081505 Treatment Collagen
## col.3 treat1 Col11a1 5.622591 Treatment Collagen
## col.4 treat2 Col11a1 5.426353 Treatment Collagen
## col.5 treat1 Col11a2 2.229198 Treatment Collagen
## col.6 treat2 Col11a2 2.163096 Treatment Collagen
##
## $`Consistently up in EAE`
## subgroup symbol value group goi_set
## eae_common_up.1 treat1 Wif1 5.8398679 Treatment Consistently up in EAE
## eae_common_up.2 treat2 Wif1 5.6460252 Treatment Consistently up in EAE
## eae_common_up.3 treat1 Flrt2 0.6817727 Treatment Consistently up in EAE
## eae_common_up.4 treat2 Flrt2 0.8207945 Treatment Consistently up in EAE
## eae_common_up.5 treat1 Tcim 4.9458467 Treatment Consistently up in EAE
## eae_common_up.6 treat2 Tcim 5.8038934 Treatment Consistently up in EAE
##
## $`Interferon gamma signaling`
## subgroup symbol value group goi_set
## ifng_signaling.1 treat1 Ifngr1 0.7538559 Treatment Interferon gamma signaling
## ifng_signaling.2 treat2 Ifngr1 0.6658387 Treatment Interferon gamma signaling
## ifng_signaling.3 treat1 Ifngr2 6.5647232 Treatment Interferon gamma signaling
## ifng_signaling.4 treat2 Ifngr2 5.8133288 Treatment Interferon gamma signaling
## ifng_signaling.5 treat1 Jak1 1.1453589 Treatment Interferon gamma signaling
## ifng_signaling.6 treat2 Jak1 1.4627156 Treatment Interferon gamma signaling
##
## $Mitochondrial
## subgroup symbol value group goi_set
## mt.1 treat1 mt-Atp6 3.909627 Treatment Mitochondrial
## mt.2 treat2 mt-Atp6 3.818793 Treatment Mitochondrial
## mt.3 treat1 mt-Atp8 1.770936 Treatment Mitochondrial
## mt.4 treat2 mt-Atp8 1.905648 Treatment Mitochondrial
## mt.5 treat1 mt-Co1 6.126852 Treatment Mitochondrial
## mt.6 treat2 mt-Co1 5.688020 Treatment Mitochondrial
##
## $Neuroinflammation
## subgroup symbol value group goi_set
## neuroinflammation.1 treat1 Fos 6.4073029 Treatment Neuroinflammation
## neuroinflammation.2 treat2 Fos 6.9742796 Treatment Neuroinflammation
## neuroinflammation.3 treat1 Fosb 1.9497291 Treatment Neuroinflammation
## neuroinflammation.4 treat2 Fosb 1.6649219 Treatment Neuroinflammation
## neuroinflammation.5 treat1 Junb 1.3837928 Treatment Neuroinflammation
## neuroinflammation.6 treat2 Junb 0.5679997 Treatment Neuroinflammation
##
## $PVF
## subgroup symbol value group goi_set
## pvf.1 treat1 Spp1 6.063762 Treatment PVF
## pvf.2 treat2 Spp1 5.454997 Treatment PVF
## pvf.3 treat1 Col6a1 9.104895 Treatment PVF
## pvf.4 treat2 Col6a1 9.343041 Treatment PVF
## pvf.5 treat1 Col1a1 2.742811 Treatment PVF
## pvf.6 treat2 Col1a1 2.484333 Treatment PVF
##
## $`Reg. fib. mig.`
## subgroup symbol value group goi_set
## fib_mig.1 treat1 Acta2 3.426720 Treatment Reg. fib. mig.
## fib_mig.2 treat2 Acta2 3.410205 Treatment Reg. fib. mig.
## fib_mig.3 treat1 Actr3 4.946648 Treatment Reg. fib. mig.
## fib_mig.4 treat2 Actr3 4.981941 Treatment Reg. fib. mig.
## fib_mig.5 treat1 Adipor2 7.616679 Treatment Reg. fib. mig.
## fib_mig.6 treat2 Adipor2 8.006051 Treatment Reg. fib. mig.
# For each cell, aggregate each gene set
# Returns a sample x geneset table (with additional metadata columns)
goi_agg_by_genes <- function(sce) {
cbind(
# Sample metadata
sce@colData %>%
as.data.frame() %>%
select(any_of(c("cluster", "celltype", "group", "subgroup", "Group"))),
# Mean expression for genes-of-interest
lapply(goi, function(goi) {
list() %>% within({
assign(
goi$name,
sce@assays@data$counts %>%
log1p() %>%
take_rows(goi$genes) %>%
colMeans()
)
})
}),
# Library size
data.frame(lib.size = sce@assays@data$counts %>% colSums()),
# Reduced dim coordinates
sce %>%
scater::logNormCounts() %>%
scater::runPCA() %>%
SingleCellExperiment::reducedDim("PCA") %>%
as.data.frame() %>%
select(x = PC1, y = PC2, z = PC3)
)
}
get_goi_genes() %>%
mock_sce(genes = ., ncells = 77) %>%
goi_agg_by_genes() %>%
head() %>%
DT::datatable(width = "100%", options = list(scrollX = TRUE))
## Warning in (function (A, nv = 5, nu = nv, maxit = 1000, work = nv + 7, reorth =
## TRUE, : You're computing too large a percentage of total singular values, use a
## standard svd instead.
This is a set of differential genes in kidney fibroblast activation from Deng et al., 2021.
fb_genes_si7 <- path_to("*/Deng-Wei-2021/*/SI7.*.gz") %>% from_tsv()
# fb_genes_si8 <- path_to("*/Deng-Wei-2021/*/SI8.*.gz") %>% from_tsv()
show_tbl <- function(tbl) {
tbl %<>%
ind2col("symbol") %>%
mutate(symbol = as.link(symbol)) %>%
select(symbol, logFC)
tbl %>%
DT::datatable(escape = FALSE) %>%
DT::formatSignif(colnames(tbl %>% select(-symbol)), digits = 3)
}
The list of genes is from Dorrier-Aran-…-Daneman, 2021, Extended Data Fig. 4.
if (!("c0" %in% names(goi))) {
goi %<>%
c(
list(
`0` = c("Col1a2", "Bgn", "Col3a1", "Col8a1", "Igf1", "Ifitm1", "Col1a1", "Cfh", "Sparc", "B2m"),
`1` = c("Apod", "Trf", "Dcn", "Gsn", "Gpc3", "Tgfbi", "Ccl11", "Smoc2", "Lum", "Cst3"),
`2` = c("Lgals1", "Timp1", "Rpl41", "Rps18", "Rps12", "Col4a1", "Cxcl10"),
`3` = c("Junb", "Fosb", "Fos", "Ptn", "Igfbp2", "Klf4", "Clec3b", "Apoe", "Rbp4", "Vtn"),
`4` = c("Rbp1", "Fth1", "Aldh1a2", "Fn1", "Igfbp3", "Slc7a11", "Ptgds", "Timp3", "Igfbp5"),
`5` = c("Ptch1", "Igfbp6", "Tmsb4x", "S100a6", "Igfbp7", "Mgp", "Saa1"),
`6` = c("Itih5", "Spp1", "Klf2", "Ubb", "Jund", "Mt1"),
`7` = c("Rgs5", "Acta2", "Myl9", "Gm13889", "Tagln", "Tpm2", "Actb")
) %>%
stack() %>%
mutate(cluster = paste0("c", ind), name = paste0("Dm #", ind)) %>%
split(.$cluster) %>%
lapply(function(c) list(name = unique(c$name), genes = c$values))
)
}
heatmap <- function(sce, colors = NULL,
anno_col = NULL, anno_row = NULL,
...) {
# `colors` should be like list(`negative` = "Blue", `positive` = "Red", ...)
colors %<>% c(NULL) %>% unlist()
if (!is.null(anno_row)) {
stopifnot(assertthat::are_equal(rownames(sce), rownames(anno_row)))
}
if (!is.null(anno_col)) {
stopifnot(assertthat::are_equal(colnames(sce), rownames(anno_col)))
}
anno_col %<>%
list(
sce@colData %>%
as.data.frame() %>%
select(any_of(c("celltype", "subgroup", "Group", "cluster", "Treatment")))
) %>%
dual(cbind)
sce %>%
SingleCellExperiment::counts() %>%
log1p() %>%
{
# (.) - rowMeans(.)
# (.) / rowSums(.)
} %>%
pheatmap::pheatmap(
show_colnames = FALSE,
treeheight_row = 0,
treeheight_col = 0,
cluster_rows = FALSE,
fontsize = 6,
color = grDevices::colorRampPalette(c("black", "yellow"))(10),
annotation_col = anno_col,
annotation_row = anno_row,
annotation_colors = c(
# TODO: use blue-red for LFC
anno_row %>%
as.list() %>%
lapply(ramp),
anno_col %>%
as.list() %>%
lapply(function(x) {
if (all(x %in% names(colors))) {
colors[unique(as.character(x))] %>% unlist()
} else {
ramp(x)
}
})
),
...
)
}
show_dm_heatmap <- function(sce, anno_row = NULL, ...) {
dm_genes <-
goi[grep("c[0-9]", goi %>% names())] %>%
lapply(as.data.frame) %>%
dual(rbind) %>%
pull(name, genes)
if (!is.null(anno_row)) {
stopifnot(assertthat::are_equal(rownames(sce), rownames(anno_row)))
}
anno_row %<>%
take_rows(names(dm_genes))
sce %<>%
take_rows(names(dm_genes))
anno_row %<>%
list(data.frame(dm.cluster = dm_genes[rownames(sce)])) %>%
dual(cbind)
heatmap(sce, anno_row = anno_row, ...)
}
unlist(goi[grep("c[0-9]", goi %>% names())]) %>%
mock_sce(ncells = 77) %>%
{
(.)[, order((.)$celltype)]
} %>%
show_dm_heatmap(
colors = colors,
cluster_cols = FALSE,
anno_col = data.frame(x = ((1:ncol(.)) %% 2), row.names = colnames(.)),
anno_row = data.frame(y = ((1:nrow(.)) %% 2), row.names = rownames(.))
)

The following table shows all the “genes of interest”.
get_goi_genes() %>%
data.frame(symbol = .) %>%
show_gene_info_table()
Small dataset to run through quickly.
sce_dummy <- readRDS(path_to("*/*/sce_dummy.rds"))
# Made with:
# load_dataset$dm(size = 77) %>% keep_only_goi() %>% saveRDS(file = "sce_dummy.rds")
## class: SingleCellExperiment
## dim: 267 74
## metadata(0):
## assays(1): counts
## rownames(267): Fos Fosb ... Zbtb20 Zbtb7a
## rowData names(0):
## colnames(74): healthy2_AATCCAGCATAGTAAG EAE1_AAAGATGTCGGAAATA ...
## EAE1_CTGTTTAAGAGTCTGG EAE1_TTAGTTCTCTCTGAGA
## colData names(4): group subgroup cluster celltype
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
data.frame(
x = sce_dummy@assays@data$counts %>% colSums(),
subgroup = sce_dummy$subgroup
) %>%
ggplot(aes(x = x, color = subgroup)) +
scale_color_manual(values = colors[as.character(gglast(colour))]) +
geom_density()

scater_attach <- function(sce) {
scater::addPerCellQC(
x = sce,
subsets = list(Mito = grep("mt-", rownames(sce)))
) %>%
scater::logNormCounts() %>%
scater::runPCA(name = "PCA", ncomponents = 20) %>%
scater::runTSNE(perplexity = 10, dimred = "PCA") %>%
scater::runUMAP()
}
sce_dummy %<>% scater_attach()
## Read the 74 x 20 data matrix successfully!
## OpenMP is working. 1 threads.
## Using no_dims = 2, perplexity = 10.000000, and theta = 0.500000
## Computing input similarities...
## Building tree...
## Done in 0.00 seconds (sparsity = 0.595690)!
## Learning embedding...
## Iteration 50: error is 67.728278 (50 iterations in 0.01 seconds)
## Iteration 100: error is 70.060095 (50 iterations in 0.01 seconds)
## Iteration 150: error is 66.598763 (50 iterations in 0.01 seconds)
## Iteration 200: error is 67.567400 (50 iterations in 0.01 seconds)
## Iteration 250: error is 61.618741 (50 iterations in 0.01 seconds)
## Iteration 300: error is 2.156299 (50 iterations in 0.00 seconds)
## Iteration 350: error is 1.678882 (50 iterations in 0.00 seconds)
## Iteration 400: error is 1.315480 (50 iterations in 0.01 seconds)
## Iteration 450: error is 0.960326 (50 iterations in 0.00 seconds)
## Iteration 500: error is 0.793071 (50 iterations in 0.00 seconds)
## Iteration 550: error is 0.735798 (50 iterations in 0.00 seconds)
## Iteration 600: error is 0.703731 (50 iterations in 0.01 seconds)
## Iteration 650: error is 0.690715 (50 iterations in 0.00 seconds)
## Iteration 700: error is 0.677563 (50 iterations in 0.00 seconds)
## Iteration 750: error is 0.678223 (50 iterations in 0.01 seconds)
## Iteration 800: error is 0.676888 (50 iterations in 0.00 seconds)
## Iteration 850: error is 0.675521 (50 iterations in 0.00 seconds)
## Iteration 900: error is 0.675841 (50 iterations in 0.00 seconds)
## Iteration 950: error is 0.677387 (50 iterations in 0.01 seconds)
## Iteration 1000: error is 0.678122 (50 iterations in 0.00 seconds)
## Fitting performed in 0.11 seconds.
## 09:50:43 UMAP embedding parameters a = 1.896 b = 0.8006
## 09:50:43 Read 74 rows and found 267 numeric columns
## 09:50:43 Reducing X column dimension to 50 via PCA
## 09:50:43 PCA: 50 components explained 95.41% variance
## 09:50:43 Using FNN for neighbor search, n_neighbors = 15
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/FNN/libs/FNN.so") ...
## 09:50:44 Commencing smooth kNN distance calibration using 4 threads
## 09:50:45 Initializing from normalized Laplacian + noise
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/RSpectra/libs/RSpectra.so") ...
## 09:50:45 Commencing optimization for 500 epochs, with 1758 positive edges
## 09:50:46 Optimization finished
sce_dummy %>%
scater::makePerCellDF() %>%
head(3) %>%
DT::datatable(width = "100%")
scater_show <- function(sce, color.by = "celltype", colors = NULL) {
items <- sce@colData[[color.by]]
colors <-
if (all(items %in% names(colors))) {
colors[unique(as.character(items))]
} else {
ramp(items, palette = "Set 2")
}
sce %>%
{
function(which_dimred) {
suppressMessages(
scater::plotReducedDim(
dimred = which_dimred,
object = (.),
colour_by = color.by
) +
scale_color_manual(values = colors) + # no need for `breaks`
labs(color = color.by) +
guides(color = guide_legend(override.aes = list(size = 2, alpha = 1))) +
theme(
axis.text = element_blank(),
axis.ticks = element_blank()
)
)
}
} %>%
{
gridExtra::arrangeGrob((.)("PCA"), (.)("TSNE"), (.)("UMAP"), nrow = 3)
}
}
sce_dummy %>%
scater_show(color.by = "subgroup", colors = colors) %>%
gridExtra::grid.arrange()

Prototype, not for direct use.
pca3d_template <- function(sce, color.by = "celltype", colors = ramp(sce@colData[[color.by]])) {
data.frame(
color_group = sce@colData[[color.by]],
lib.size = sce@assays@data$counts %>% colSums(),
SingleCellExperiment::reducedDim(sce, "PCA") %>%
as.data.frame() %>%
select(PC1, PC2, PC3)
) %>%
group_by(color_group) %>%
sample(size = 30, replace = TRUE) %>%
ungroup() %>%
# print() %>%
plotly::plot_ly() %>%
plotly::add_markers(
x = ~PC1, y = ~PC2, z = ~PC3,
marker = list(size = 2, opacity = 1),
color = ~color_group,
colors = colors
) %>%
plotly::layout(
scene = list(
xaxis = list(title = "PC1", showticklabels = FALSE),
yaxis = list(title = "PC2", showticklabels = FALSE),
zaxis = list(title = "PC3", showticklabels = FALSE)
)
)
}
sce_dummy %>% pca3d_template(color.by = "subgroup", colors = colors)
default_cyclic_genes <-
# Convert the "cyclic genes" of `Revelio` from Hs to Mm
Revelio::revelioTestData_cyclicGenes %>%
lapply(function(genes) {
merge(
x = data.frame(symbol.x = genes),
y = homology(taxid$hs, taxid$mm),
by = "symbol.x",
all.x = TRUE
) %>%
pull(symbol.y) %>%
sort(na.last = TRUE)
})
default_cyclic_genes %>%
as.data.frame() %>%
head() %>%
rbind("...") %>%
kable()
| G1.S | S | G2 | G2.M | M.G1 |
|---|---|---|---|---|
| Abca7 | a | 1700001L19Rik | Adh4 | 1700102P08Rik |
| Acd | Abcc2 | 1700066M21Rik | Ahi1 | 2700049A03Rik |
| Acyp1 | Abcc5 | Alkbh1 | Akirin2 | Afap1 |
| Adamts1 | Abhd10 | Anln | Ankrd40 | Agfg1 |
| Adck2 | Adam22 | Ap3d1 | Anln | Agpat3 |
| Adcy6 | Arhgap42 | Arhgap19 | Arhgap19 | Akap13 |
| … | … | … | … | … |
The DC1-DC2 plane is supposed to show a circle reflecting the cell cycle (cf. Fig 1 in Schwabe et al., 2020).
cell_cycle <- function(sce, batch.by = NA, cyclic_genes = default_cyclic_genes) {
list(
counts = sce@assays@data$counts,
orig.id = colnames(sce),
batch = if (is.na(batch.by)) "cell" else paste0(sce@colData[[batch.by]], "_")
) %>%
within({
colnames(counts) <- paste0(batch, 1:ncol(counts))
orig.id <- data.frame(old = orig.id, new = colnames(counts)) %>% pull(old, new)
}) %>%
with({
counts %>%
Revelio::createRevelioObject(cyclicGenes = cyclic_genes, lowernGeneCutoff = 0) %>%
Revelio::getCellCyclePhaseAssignInformation() %>%
Revelio::getPCAData() %>%
Revelio::getOptimalRotation() %>%
list(cyclic = .) %>%
with({
assertthat::assert_that(all(
rownames(cyclic@cellInfo) == colnames(cyclic@transformedData$pca$data)
))
cbind(
cyclic@transformedData$pca$data %>% `[`(1:min(5, nrow(.)), ) %>% t(),
cyclic@transformedData$dc$data %>% `[`(1:min(5, nrow(.)), ) %>% t(),
cyclic@cellInfo
)
}) %>%
data.frame(row.names = orig.id[rownames(.)])
}) %>%
cbind(as.data.frame(sce@colData)[rownames(.), , drop = FALSE])
}
cell_cycle_plot2d <- function(cco, colors = ramp(cco$ccPhase)) {
cco %>%
ggplot(aes(x = DC1, y = DC2, color = ccPhase)) +
geom_point(size = 3, stroke = 1) +
scale_color_manual(values = colors, breaks = gglast(colour))
}
cco_revelio <-
cell_cycle(
Revelio::revelioTestData_rawDataMatrix %>%
make_sce(
counts = .,
coldata = data.frame(
x = colnames(.) %>% strsplit("_") %>% dual(rbind),
row.names = colnames(.)
) %>% select(batch = x.1)
),
batch.by = "batch",
cyclic_genes = Revelio::revelioTestData_cyclicGenes
)
## 2021-07-07 19:35:51: calculating optimal rotation: 2021-07-07 19:35:51: calculating PCA: 2021-07-07 19:35:51: assigning cell cycle phases: 2021-07-07 19:35:51: reading data: 6.27secs
## 35.64secs
## 1.12mins
## 1.64mins
cco_revelio %>%
ggplot(aes(x = DC1, y = DC2, color = ccPhase, shape = batch)) +
geom_point(size = 3, stroke = 1) +
scale_color_manual(values = colors, breaks = gglast(colour))

cco_mock <-
unlist(as.list(default_cyclic_genes)) %>%
{
(.)[!is.na(.)]
} %>%
mock_sce(ncells = 33) %>%
cell_cycle()
## 2021-07-07 19:37:30: calculating optimal rotation: 2021-07-07 19:37:30: calculating PCA: 2021-07-07 19:37:30: assigning cell cycle phases: 2021-07-07 19:37:30: reading data: 0.02secs
## 0.55secs
## 0.62secs
## 48.46secs
cco_mock %>%
ggplot(aes(x = DC1, y = DC2, color = celltype, fill = ccPhase)) +
geom_point(shape = 21, size = 5) +
ggplot2::scale_color_manual(values = colors, breaks = gglast(colour)) +
ggplot2::scale_fill_manual(values = colors, breaks = gglast(fill))

cell_cycle_plot3d <- function(cco, colors = ramp(cco$ccPhase)) {
cco %>%
with({
plotly::plot_ly() %>%
plotly::add_markers(
x = DC1, y = DC2, z = DC3,
size = 4,
color = ccPhase,
colors = colors
)
})
}
cco_mock %>% cell_cycle_plot3d()
cco_mock %>%
with({
rgl::plot3d(DC1, DC2, DC3, size = 3, type = "s", col = ramp(ccPhase)[ccPhase])
rgl::rglwidget()
})
deseq <- function(sce, design) {
sce %>%
DESeq2::DESeqDataSet(design = design) %>%
DESeq2::estimateSizeFactors(type = "poscounts") %>%
DESeq2::DESeq(quiet = TRUE, fitType = "local") %>%
DESeq2::results() %>%
as.data.frame() %>%
select(base = baseMean, lfc = log2FoldChange, p = pvalue) %>%
mutate(base = log1p(base)) %>%
tidyr::drop_na() %>%
mutate(p = p.adjust(p, "BH"), p.unadj = p) %>%
ind2col("symbol") %>%
list(table = ., design = design)
}
edger <- function(sce, design) {
sce %>%
edgeR::estimateDisp(robust = TRUE) %>%
# edgeR::calcNormFactors(method = "TMM") %>%
# edgeR::estimateGLMTagwiseDisp(design = design) %>%
edgeR::glmFit(design = model.matrix(design, data = sce@colData)) %>%
edgeR::glmLRT(coef = 2) %>%
`$`(table) %>%
select(base = logCPM, lfc = logFC, p = PValue, LR = LR) %>%
tidyr::drop_na() %>%
mutate(p = p.adjust(p, "BH"), p.unadj = p) %>%
ind2col("symbol") %>%
list(table = ., design = design)
}
# Hacky fctn to determine colors for `lfc > 0` and `lfc < 0`
lfc_colors <- function(sce, design, colors) {
f <- terms(design) %>% attr("factors")
stopifnot((nrow(f) == 1) && (ncol(f) == 1))
f <- sce@colData[[f %>% colnames()]]
cc <- colors[as.character(f %>% levels())] # TODO: need `as.character`?
cc <- c(cc[[1]], (rowMeans(col2rgb(cc[-1])) / 255) %>% as.list() %>% dual(rgb))
(. %>% (colorRamp(cc)) %>% RGB())
}
compute_de <- function(sce, design) {
list(
deseq = suppressMessages(deseq(sce, design)),
edger = suppressMessages(edger(sce, design))
) %>%
with({
list(
table = rbind(
deseq$table %>%
select(symbol, base, lfc, p) %>%
mutate(src = "DESeq2"),
edger$table %>%
select(symbol, base, lfc, p) %>%
mutate(src = "edgeR")
),
design = design
)
}) %>%
within({
# Attach LFC color (not very important)
table %<>%
group_by(src) %>%
mutate(color = (lfc_colors(sce, design, colors))(percent_rank(lfc))) %>%
ungroup()
})
}
sce_dummy.de <- sce_dummy %>% compute_de(~group)
sce_dummy.de$table %>%
head() %>%
rbind("...") %>%
kable()
| symbol | base | lfc | p | src | color |
|---|---|---|---|---|---|
| Fos | 1.94433786752507 | 1.03952791369199 | 0.0350644319817966 | DESeq2 | #C98144FF |
| Fosb | 1.32188433643544 | 0.392699195334891 | 0.94357785439112 | DESeq2 | #A09B36FF |
| Junb | 1.71655787188708 | 0.618371363877933 | 0.403770457436529 | DESeq2 | #B0913BFF |
| Spp1 | 0.685515786871741 | 1.75726212594778 | 0.121724809299487 | DESeq2 | #E4704DFF |
| Col6a1 | 0.644138066638724 | 0.767376344078733 | 0.324044869548652 | DESeq2 | #B68D3DFF |
| Col1a1 | 2.86899094589606 | 1.87748518409641 | 1.75462356248164e-09 | DESeq2 | #E86E4EFF |
| … | … | … | … | … | … |
show_de_table <- function(.) {
(.) %>%
group_by(src) %>%
slice_min(p, n = 777) %>%
slice_max(abs(lfc), n = 777) %>%
ungroup() %>%
merge(
y = org.Mm.eg.db::org.Mm.egSYMBOL2EG %>% as.data.frame(),
all.x = TRUE,
by = "symbol"
) %>%
merge(
y = gene.summary$mm %>% select(gene_id, description, summary),
all.x = TRUE,
by = "gene_id"
) %>%
select(-any_of("gene_id")) %>%
select(-any_of("color")) %>%
# Role: TF, Rec, Lig, etc
mutate(role = symbol2role(symbol)) %>%
mutate(symbol = as.link(symbol)) %>%
mutate(description = if_else(is.na(summary), description, paste0("<details>", "<summary>", description, "</summary>", summary, "</details>"))) %>%
select(-summary) %>%
mutate(description = paste0("<span style='font-size: 70%;'>", description, "</span>")) %>%
DT::datatable(escape = FALSE) %>%
DT::formatSignif(c("base", "lfc", "p"), digits = 3)
}
sce_dummy.de$table %>%
group_by(src) %>%
slice_min(p, n = 33) %>%
ungroup() %>%
show_de_table()
show_de_heatmap <- function(sce, de_table, n = 33, ...) {
top <- de_table %>%
group_by(lfc > 0) %>%
slice_min(p, n = n, with_ties = FALSE) %>%
ungroup() %>%
arrange(lfc) %>%
select(symbol, lfc, p) %>%
mutate(`-log10(p)` = -log10(p)) %>%
by_col1() %>%
`[`(rownames(.) %in% rownames(sce), )
sce[rownames(top), ] %>%
heatmap(anno_row = top, ...)
}
sce_dummy %>%
show_de_heatmap(sce_dummy.de$table %>% filter(src == "edgeR"), colors = colors)

show_de_volcano <- function(de_table) {
de_table %<>%
mutate(goi_set = symbol2goiset(symbol)) %>%
group_by(src) %>%
arrange(lfc) %>%
mutate(is_out = (lfc <= max(head(lfc, 5))) | (min(tail(lfc, 5)) <= lfc)) %>%
ungroup() %>%
mutate(label = ifelse(is_out, symbol, ""))
ggplot(de_table, aes(x = lfc, y = -log10(p))) +
# All genes
geom_point(color = "gray") +
# Color by geneset
geom_point(
data = de_table %>% filter(goi_set != "Other"),
aes(x = lfc, y = -log10(p), color = goi_set)
) +
scale_color_manual(values = ramp(de_table$goi_set)) +
labs(color = "Gene set") +
# Label outliers
geom_text(aes(label = label), hjust = -0.3, position = position_jitter(h = 4), size = 3, color = "blue") +
facet_grid(. ~ src, scale = "free") +
theme(legend.position = "bottom")
}
sce_dummy.de$table %>% show_de_volcano()

show_de_basecamp <- function(.) {
(.) %>%
mutate(goi_set = symbol2goiset(symbol)) %>%
group_by(goi_set) %>%
arrange(lfc) %>%
mutate(is_out = (lfc <= max(head(lfc, 5))) | (min(tail(lfc, 5)) <= lfc)) %>%
mutate(is_out = is_out | (min(tail(sort(base), 5)) <= base)) %>%
ungroup() %>%
mutate(label = ifelse(is_out, symbol, "")) %>%
ggplot(aes(x = lfc, y = base)) +
geom_point(data = (.), alpha = 0.5, size = 1, color = "grey") +
geom_point(color = "black", size = 0.5) +
scale_y_log10() +
geom_text(aes(label = label), hjust = -0.3, position = position_jitter(h = 0.01), size = 2, color = "blue") +
facet_wrap(~goi_set, scale = "free_y") +
# Note: put labs(x = "..") outside
labs(y = "Expression") +
theme(
legend.position = "bottom",
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
)
}
sce_dummy.de$table %>%
show_de_basecamp() +
labs(x = "Condition1 <-- LFC --> Condition2")

show_de_cmp_lfc <- function(table) {
merge(
table %>% filter(src == "DESeq2"),
table %>% filter(src == "edgeR"),
by = "symbol",
all = TRUE
) %>%
# tidyr::replace_na(list(p.x = 0, p.y = 0)) %>%
tidyr::drop_na() %>%
mutate(sig.x = (p.x < quantile(p.x, 0.01))) %>%
mutate(sig.y = (p.y < quantile(p.y, 0.01))) %>%
mutate(sig = if_else(sig.x & sig.y, "Intersection", if_else(sig.x, "DESeq", if_else(sig.y, "edgeR", "")))) %>%
mutate(label = if_else(sig.x | sig.y, symbol, "")) %>%
{
ggplot((.) %>% filter(sig != ""), aes(x = lfc.x, y = lfc.y)) +
geom_point(data = (.) %>% filter(sig == ""), stroke = 0, alpha = 0.2) +
geom_point(aes(color = sig)) +
labs(x = "LFC by DESeq2", y = "LFC by edgeR", color = "Top %1 signif.") +
geom_text(aes(label = label, color = sig), hjust = -0.3, position = position_jitter(h = 0.05), size = 3, alpha = 0.8, show.legend = FALSE) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed")
}
}
sce_dummy.de$table %>% show_de_cmp_lfc()

compute_go <- function(de_table, n = 33) {
de_table %>%
mutate(direction = if_else(lfc > 0, "up", "dn")) %>%
group_by(direction) %>%
slice_min(p, n = n, with_ties = TRUE) %>%
slice_max(abs(lfc), n = n, with_ties = FALSE) %>%
split(.$direction) %>%
within({
f <- (function(x) x$symbol[1:5] %>% paste(collapse = ", "))
message(paste("Up:", f(up), "..."))
message(paste("Dn:", f(dn), "..."))
rm(f)
}) %>%
lapply(function(de_table) {
de_table %>%
# Get the Entrez IDs of our genes
mutate(gene_id = symbol2geneid(symbol)) %>%
pull(gene_id) %>%
# GO enrichment analysis
limma::goana(species = "Mm") %>%
ind2col("category") %>%
rename(p = P.DE, `#DE` = DE, `#cat` = N, GO = Ont, term = Term) %>%
# Limit selection
filter(p <= 0.05) %>%
arrange(p)
})
}
show_go_table <- function(.) {
(.) %>%
with({
rbind(
up %>% mutate(direction = "lfc_up"),
dn %>% mutate(direction = "lfc_down")
) %>%
arrange(p)
}) %>%
mutate(category = if_else(grepl("^GO:", category), as.link(category), category)) %>%
mutate(category = paste0("<span style='white-space:nowrap;'>", category, "(", GO, ")", "</span>")) %>%
mutate(direction = if_else(grepl("lfc_u", direction), paste0("<span style='color:darkred'>", direction, "</span>"), direction)) %>%
mutate(direction = if_else(grepl("lfc_d", direction), paste0("<span style='color:darkgreen'>", direction, "</span>"), direction)) %>%
dplyr::select(any_of(c("category", "direction", "p", "#DE", "#cat", "term"))) %>%
DT::datatable(escape = FALSE, width = "100%") %>%
DT::formatSignif("p", digits = 3)
}
sce_dummy.go <-
sce_dummy.de$table %>%
compute_go()
## Up: Col4a2, Col4a1, Ier3, Col18a1, Col8a1 ...
## Dn: Col9a2, Pdhb, Col9a2, Ddit4, Col4a6 ...
rbind(
sce_dummy.go$up %>% head(2),
sce_dummy.go$dn %>% head(2),
"..."
) %>%
DT::datatable(escape = FALSE, width = "100%")
sce_dummy.go %>% show_go_table()
go_info <- list(
tax = list(mm = path_to("*/*/*/*_mouse_sym2cat.txt.gz") %>% read.csv(sep = "\t")),
obo = path_to("*/*/*/obo_go.txt.gz") %>% read.csv(sep = "\t")
)
go_info$tax$mm %>%
head() %>%
kable()
| symbol | goid |
|---|---|
| 0610009B22Rik | GO:0003674 |
| 0610009B22Rik | GO:0006888 |
| 0610009B22Rik | GO:0048471 |
| 0610009B22Rik | GO:0005634 |
| 0610009B22Rik | GO:0005737 |
| 0610009B22Rik | GO:0030008 |
go_info$obo %>%
head() %>%
kable()
| goid | term | namespace |
|---|---|---|
| GO:0000001 | mitochondrion inheritance | biological_process |
| GO:0000002 | mitochondrial genome maintenance | biological_process |
| GO:0000003 | reproduction | biological_process |
| GO:0000006 | high-affinity zinc transmembrane transporter activity | molecular_function |
| GO:0000007 | low-affinity zinc ion transmembrane transporter activity | molecular_function |
| GO:0000009 | alpha-1,6-mannosyltransferase activity | molecular_function |
go2symbols <- function(go_term) {
with(
go_info,
merge(tax$mm, filter(obo, stringr::str_detect(term, go_term)), by = "goid") %>%
pull(symbol) %>%
unique()
)
}
# example: "inflammatory"
list(
sce = sce_dummy,
genes = go2symbols("infla"),
table = sce_dummy.de$table %>% filter(src == "edgeR")
) %>%
with(
sce %>%
.[, order(.$subgroup)] %>%
show_de_heatmap(table %>% filter(symbol %in% genes), colors = colors, cluster_cols = FALSE)
)

# If A and B are SingleCellExperiment-like, then `cosine` computes
# the cosine similarity of samples of A against samples of B
cosine <- function(A, B) {
A <- A@assays@data$counts
B <- B@assays@data$counts
ii <- intersect(rownames(A), rownames(B))
A <- t(as.matrix(A[ii, , drop = FALSE]))
B <- t(as.matrix(B[ii, , drop = FALSE]))
A <- A / sqrt(rowSums(A * A))
B <- B / sqrt(rowSums(B * B))
A %*% t(B)
}
Example:
cosine(
sce_dummy[, sce_dummy$subgroup == "healthy1"],
sce_dummy[, sce_dummy$subgroup == "healthy1"]
) %>%
pheatmap::pheatmap()

coex <- function(sce) {
# Filter for the correlation matrix
f <- function(.) ((.) != 0)
sce %>%
SingleCellExperiment::counts() %>%
t() %>%
# stats::cor(method = "spearman") %>%
Hmisc::rcorr(type = "spearman") %>%
`$`(r) %>%
{
with(
(upper.tri(.) * (.)) %>% f() %>% which(arr.ind = TRUE) %>% as.data.frame(),
data.frame(a = rownames(.)[row], b = colnames(.)[col], w = (.)[cbind(row, col)])
)
}
}
show_coex_graph <- function(coex_df, de_table, n = 77) {
coex_df %<>% as.data.frame()
# Note: the graph vertices are pulled from the de_table
# Only those occurring in coex_df$a or .$b are retained
stopifnot(!any(duplicated(de_table$symbol)))
# Color of vertex labels
# de_table %>% pull(color, symbol)
stopifnot(all(c("a", "b", "w") %in% colnames(coex_df)))
# Symbol -> LFC map
node_lfc <- de_table %>% pull(lfc, symbol)
# Attach edge color
coex_df %<>%
group_by(w > 0) %>%
mutate(color = ((1 + w / max(abs(w))) / 2)) %>%
ungroup() %>%
mutate(color = colorRamp(c("blue", "yellow", "red"))(color) %>% RGB(alpha = 0.2))
# Co-expression graph
ex_graph <-
coex_df %>%
# Take top `n` co-expression edges at both extremes
group_by(w > 0) %>%
slice_max(abs(w), n = n) %>%
ungroup() %>%
select(a, b, w, color) %>%
igraph::graph_from_data_frame(
directed = FALSE,
vertices = (
de_table %>%
filter((symbol %in% coex_df$a) | (symbol %in% coex_df$b)) %>%
select(name = symbol, color)
)
)
graphs <- list(
# Proto-graphs based on OmniPath
tf = omnipath.tf,
lr = omnipath.lr,
ia = omnipath.in %>%
anti_join(omnipath.tf, by = c("source", "target")) %>%
anti_join(omnipath.lr, by = c("source", "target"))
) %>%
# Convert them to `igraph` objects
lapply(function(df) {
df %>%
mutate(W = is_stimulation - is_inhibition) %>%
select(a = source_genesymbol, b = target_genesymbol, W) %>%
filter(a %in% igraph::V(ex_graph)$name) %>%
filter(b %in% igraph::V(ex_graph)$name) %>%
group_by(a, b) %>%
summarize(W = sum(W), .groups = "keep") %>%
filter(W != 0) %>%
# Attach co-expression `w` and edge color
merge(coex_df %>% select(a, b, w, color), by = c("a", "b")) %>%
# Ensure correct order
select(a, b, w, W, color) %>%
igraph::graph_from_data_frame(directed = TRUE)
}) %>%
within({
# Attach the co-expression graph
ex <- ex_graph
# For the co-expression graph, by definition:
# `W` is positive iff both LFC are of the same sign
edges <- ex %>% igraph::as_data_frame("edges")
igraph::E(ex)$W <- sign(node_lfc[edges$from] * node_lfc[edges$to])
rm(edges)
}) %>%
lapply(function(g) {
igraph::E(g)$coherent <- (sign(igraph::E(g)$w) == sign(igraph::E(g)$W))
return(g)
})
rm(ex_graph)
# for layouting
set.seed(41)
graph_layout <-
(
graphs$ex
# + igraph::as.undirected(graphs$tf)
# + igraph::as.undirected(graphs$lr)
+ igraph::as.undirected(graphs$ia)
) %>%
# igraph::layout_with_kk(dim = 2, kkconst = igraph::vcount(.)) %>%
igraph::layout_with_dh(maxiter = 22) %>%
# igraph::layout_with_lgl() %>%
# igraph::layout_with_fr(start.temp = 1e-1) %>%
as.data.frame() %>%
magrittr::set_colnames(c("x", "y")) %>%
magrittr::set_rownames(igraph::V(graphs$ex)$name)
graphs %<>%
lapply(function(graph) {
list(graph = graph, pos = graph_layout) %>% within({
# Edges as dataframe
edges <-
graph %>%
igraph::get.edgelist() %>%
magrittr::set_colnames(c("a", "b")) %>%
as.data.frame()
# Attach segment data
segment <-
cbind(
take_rows(pos, edges$a) %>% magrittr::set_colnames(c("x", "y")),
take_rows(pos, edges$b) %>% magrittr::set_colnames(c("xend", "yend"))
) %>%
mutate(
x = x + (xend - x) * 0.02, xend = xend - (xend - x) * 0.03,
y = y + (yend - y) * 0.02, yend = yend - (yend - y) * 0.03
)
})
})
list(
slide1 = list(
alpha = list(ex = 2e-2, tf = 0.77, lr = 3e-2, ia = 3e-2),
title = "TF -> target"
),
slide2 = list(
alpha = list(ex = 2e-2, tf = 3e-2, lr = 0.77, ia = 3e-2),
title = "Ligand -> receptor"
),
slide3 = list(
alpha = list(ex = 2e-2, tf = 3e-2, lr = 3e-2, ia = 0.77),
title = "More protein-protein interaction"
),
slide4 = list(
alpha = list(ex = 0.55, tf = 3e-2, lr = 3e-2, ia = 3e-2),
title = "Co-expression"
)
) %>% lapply(function(slide) {
p <- ggplot()
p <- p +
# Co-expression segments
geom_segment(
data = graphs$ex$segment,
aes(x = x, y = y, xend = xend, yend = yend),
color = igraph::E(graphs$ex$graph)$color,
alpha = slide$alpha$ex,
size = 0.1,
linetype = if_else(igraph::E(graphs$ex$graph)$coherent, "solid", "dashed")
)
for (x in c("tf", "lr", "ia")) {
E <- igraph::E(graphs[[x]]$graph)
p <- p +
geom_curve(
data = graphs[[x]]$segment,
lineend = "butt", # linejoin = "mitre",
aes(x = x, y = y, xend = xend, yend = yend),
arrow = arrow(
length = unit(0.007, "npc"),
# activation / inhibition arrow:
angle = if_else(E$W > 0, 10, if_else(E$W < 0, 90, 0))
),
color = E$color,
alpha = slide$alpha[[x]],
size = 0.2,
curvature = 0.05,
linetype = if_else(E$coherent, "solid", "dashed")
)
}
p <- p +
# Labels
geom_text(
data = graph_layout,
aes(x = x, y = y, label = igraph::V(graphs$ex$graph)$name),
size = 2,
color = igraph::V(graphs$ex$graph)$color
)
p <- p +
theme_void() +
ggplot2::ggtitle(slide$title) +
theme(plot.title = element_text(hjust = 0.95, vjust = -3, size = 10))
return(p)
})
# graph %>%
# igraph::plot.igraph(
# edge.width = 0.7,
# edge.color = igraph::E(.)$color,
#
# vertex.size = 0,
# vertex.shape = 'none',
# #vertex.color = "black",
# vertex.label.cex = 0.4,
# vertex.label.color = vcolor[igraph::V(.)$name],
#
# layout = graph_layout
# )
}
sce_dummy %>%
`[`(, (.)$group == "healthy") %>%
take_rows(get_goi_genes()) %>%
coex() %>%
show_coex_graph(sce_dummy.de$table %>% filter(src == "edgeR"))




requireNamespace("visNetwork")
## Loading required namespace: visNetwork
omnipath.tf %>%
mutate(a = source_genesymbol, b = target_genesymbol) %>%
filter(a %in% get_goi_genes(), b %in% get_goi_genes()) %>%
select(a, b) %>%
igraph::graph_from_data_frame(directed = FALSE) %>%
visNetwork::toVisNetworkData() %>%
{
visNetwork::visNetwork(nodes = .$nodes, edges = .$edges, layout = "layout_with_kk") %>%
visNetwork::visPhysics(enabled = FALSE)
}
# scenic_options <-
# SCENIC::initializeScenic(org = "mgi", dbDir = path_to("data/SCENIC/*cache"), dbs = SCENIC::defaultDbNames[["mgi"]], datasetTitle = "dummy", nCores = 1)
# sce_dummy@assays@data$counts %>%
# list(counts = .) %>% with({
# counts %>%
# `[`(SCENIC::geneFiltering(., scenicOptions = scenic_options), ) %>%
# #SCENIC::runCorrelation(scenic_options) %>%
# log1p() %>%
# SCENIC::runGenie3(scenic_options)
# })
#
#
# `%dopar%` <- foreach::`%dopar%`
# foreach <- foreach::foreach
# scenic_options %>%
# SCENIC::runSCENIC_1_coexNetwork2modules() %>%
# SCENIC::runSCENIC_2_createRegulons() %>%
# SCENIC::runSCENIC_3_scoreCells(log1p(sce_dummy@assays@data$counts))
Remove unused.
rm(cco_revelio)
gene.summary$hs <- NULL
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 9409199 502.6 16430580 877.5 16430580 877.5
## Vcells 25140465 191.9 40708301 310.6 38944817 297.2
RAM usage.
culprits() %>%
head(10) %>%
kable()
| size | cumsize | unit | |
|---|---|---|---|
| omnipath.tf | 66.00 | 170.0 | Mb |
| omnipath.in | 41.00 | 110.0 | Mb |
| gene.summary | 39.00 | 66.0 | Mb |
| go_info | 16.00 | 28.0 | Mb |
| omnipath.lr | 4.10 | 12.0 | Mb |
| show_coex_graph | 1.60 | 8.2 | Mb |
| sce_dummy | 0.52 | 6.6 | Mb |
| sce_dummy.go | 0.48 | 6.1 | Mb |
| goi_agg_by_group | 0.45 | 5.6 | Mb |
| heatmap | 0.37 | 5.1 | Mb |
# Collection of dataset loaders
load_dataset <- list()
# Summarizes each column in `cols`
# Call with results='asis'
tables <- function(sce, cols) {
for (col in cols) {
sce@colData %>%
as.data.frame() %>%
pull(col) %>%
table(useNA = "ifany") %>%
as.data.frame() %>%
rename_with(~col, ".") %>%
rename_with(~"#", Freq) %>%
t() %>%
kable() %>%
print()
# https://bookdown.org/yihui/rmarkdown-cookbook/kable.html#generate-multiple-tables-from-a-for-loop
# https://stackoverflow.com/questions/52631689/how-to-show-formatted-r-output-with-results-asis-in-rmarkdown
}
}
“Mouse Whole Cortex and Hippocampus 10x” dataset from Allen Brain, 2020: [explore], [download], [protocols].
The dataset contains ~1M cells. We have selected ~31k cells of type “Astro”, “Endo” and “VLMC”.
load_dataset$ab <- function(max.size = 9999) {
sce <-
make_sce(
counts = (
path_to("*/AllenBrain/Mouse-WCH*/*fewer_cells/data.*.gz") %>%
from_tsv() %>%
`[`(, sort(colnames(.)))
),
coldata = (
path_to("*/AllenBrain/Mouse-WCH*/f*cells/meta.*.gz") %>%
from_tsv() %>%
`[`(sort(rownames(.)), )
)
) %>%
# Only keep VLMC
`[`(, (.)$subclass_label == "VLMC") %>%
# Select a random subset of cells for expediency
`[`(, sample(colnames(.), size = min(ncol(.), max.size))) %>%
# Drop non-expressed cells
`[`(, (.)@assays@data$counts %>% colSums() > 0) %>%
# Order by cell type: sometimes convenient below, esp. for plotting
`[`(, order((.)$cell_type_alias_label))
# conversion to matrix takes long, hence do it here
sce@assays@data$counts %<>% as.matrix()
# alias `celltype` column
sce@colData$celltype <- sce@colData$cell_type_alias_label
sce@colData$celltype %<>% as.factor() %>% relevel(ref = "375_VLMC")
return(sce)
}
GSE98816: single cell RNA-seq of mouse brain vascular transcriptomes.
load_dataset$bh <- function(max.size = 9999) {
sce <-
make_sce(
counts = path_to("*/Betsholtz-2018/*/expr.*.gz") %>%
from_tsv() %>%
t(),
coldata = path_to("*/Betsholtz-2018/*/meta.*.gz") %>% from_tsv()
) %>%
# Only keep FB
`[`(, (.)$celltype %in% c("FB1", "FB2")) %>%
# Select a random subset of cells for expediency
`[`(, sample(colnames(.), size = min(ncol(.), max.size))) %>%
# Drop non-expressed cells
`[`(, (.)@assays@data$counts %>% colSums() > 0) %>%
# Order by cell type
`[`(, order((.)$celltype))
sce@assays@data$counts %<>% as.matrix()
sce@colData$celltype %<>% as.factor() %>% relevel(ref = "FB1")
return(sce)
}
Raw data from GSE113973 (last update: Nov 13, 2019), metadata from UCSC cell browser.
load_dataset$cb <- function(max.size = 9999) {
sce <-
make_sce(
counts = path_to("*/CasteloBranco-2018/*/expr.*.gz") %>%
from_tsv() %>%
t(),
coldata = path_to("*/CasteloBranco-2018/*/meta.*.gz") %>%
from_tsv()
) %>%
# Only keep those cells
`[`(, (.)$celltype %in% c("VLMC1", "VLMC2", "VLMC3", "VLMC4")) %>%
# Select a random subset of cells for expediency
`[`(, sample(colnames(.), size = min(ncol(.), max.size))) %>%
# Drop non-expressed cells
`[`(, (.)@assays@data$counts %>% colSums() > 0) %>%
# Order by cell type
`[`(, order((.)$celltype))
sce@assays@data$counts %<>% as.matrix()
sce@colData$Group %<>% as.factor() %>% relevel(ref = "Ctrl")
sce@colData$celltype %<>% as.factor() %>% relevel(ref = "VLMC3")
return(sce)
}
Expression data from GSE135186 (last update: Feb 10, 2021). The cell-to-cluster assignment is a private communication by D. Aran (2021-05-20). The associated paper is Dorrier-Aran-…-Daneman, 2021.
load_dataset$dm <- function(max.size = 9999) {
sce <-
make_sce(
counts = path_to("*/Daneman-2020/*/expr.*.gz") %>%
from_tsv() %>%
t(),
coldata = path_to("*/Daneman-2020/*/meta.*.gz") %>%
from_tsv()
) %>%
# Select a random subset of cells for expediency
`[`(, sample(colnames(.), size = min(ncol(.), max.size))) %>%
# Drop non-expressed cells (there shouldn't be any here)
`[`(, (.)@assays@data$counts %>% colSums() > 0) %>%
# Order samples
`[`(, order((.)$cluster))
# Drop cells without cluster (they separate in dim. red. plots)
sce %<>% `[`(, !is.na((.)$cluster))
sce@assays@data$counts %<>% as.matrix()
# Set factor reference level
sce@colData$group %<>% as.factor() %>% relevel(ref = "healthy")
sce@colData$cluster %<>% c() %>%
as.factor() %>%
relevel(ref = "0")
# Alias for consistency
sce@colData$celltype <- sce@colData$cluster
return(sce)
}
datasets <- load_dataset %>% lapply(\(loader) memo(loader)(max.size = 777))
datasets <- datasets %>% lapply(scater_attach)
# memo: `%<>%` doesn't seem to work with cache=TRUE
gridExtra::grid.arrange(
datasets$ab %>% scater_show(color.by = "celltype", colors = colors),
datasets$ab %>% scater_show(color.by = "donor_sex_label"),
ncol = 2
)

gridExtra::grid.arrange(
datasets$bh %>% scater_show(color.by = "celltype", colors = colors),
datasets$bh %>% scater_show(color.by = "Mouse ID"),
ncol = 2
)

gridExtra::grid.arrange(
datasets$cb %>% scater_show(color.by = "celltype", colors = colors),
datasets$cb %>% scater_show(color.by = "Group", colors = colors),
ncol = 2
)

gridExtra::grid.arrange(
datasets$cb %>% scater_show(color.by = "MouseModel", colors = colors),
datasets$cb %>% scater_show(color.by = "Plate", colors = colors),
ncol = 2
)

gridExtra::grid.arrange(
datasets$dm %>% scater_show(color.by = "subgroup", colors = colors),
datasets$dm %>% scater_show(color.by = "cluster", colors = colors),
ncol = 2
)

Remove unused.
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 9415402 502.9 16430580 877.5 16430580 877.5
## Vcells 25148597 191.9 65973551 503.4 53352283 407.1
RAM usage.
culprits() %>%
head(10) %>%
kable()
| size | cumsize | unit | |
|---|---|---|---|
| datasets | 470.00 | 640.0 | Mb |
| omnipath.tf | 66.00 | 170.0 | Mb |
| omnipath.in | 41.00 | 110.0 | Mb |
| gene.summary | 39.00 | 66.0 | Mb |
| go_info | 16.00 | 28.0 | Mb |
| omnipath.lr | 4.10 | 12.0 | Mb |
| show_coex_graph | 1.60 | 8.3 | Mb |
| sce_dummy | 0.52 | 6.7 | Mb |
| sce_dummy.go | 0.48 | 6.2 | Mb |
| goi_agg_by_group | 0.45 | 5.7 | Mb |
The following is an approximation of Extended Data Fig 4c from Dorrier-Aran-…-Daneman, 2021.
We check whether other datasets (besides “Daneman”) cluster by their cell types using the same set of genes.
The samples are presorted by “Daneman” cluster and by condition within each cluster.
datasets$dm %>%
{
# Remove the `celltype` columns (same as `cluster`)
.@colData %<>% `[`(, colnames(.) != "celltype", drop = FALSE)
# Order samples
.[, order(paste(.$cluster, .$subgroup))]
} %>%
show_dm_heatmap(colors = colors, cluster_cols = FALSE)

The samples are clustered.
datasets$ab %>% show_dm_heatmap(colors = colors, cluster_cols = TRUE)

The samples are clustered.
datasets$bh %>% show_dm_heatmap(colors = colors, cluster_cols = TRUE)

The samples are clustered.
datasets$cb %>% show_dm_heatmap(colors = colors, cluster_cols = TRUE)

cco <- list(
ab = datasets$ab %>%
cell_cycle(batch.by = "celltype"),
bh = datasets$bh %>%
cell_cycle(batch.by = "celltype"),
cb = datasets$cb %>%
cell_cycle(batch.by = "celltype"),
dm = datasets$dm %>%
`[`(, (.)$subgroup %in% c("healthy1", "healthy2")) %>%
cell_cycle(batch.by = "subgroup")
)
## 2021-07-07 19:44:22: calculating optimal rotation: 2021-07-07 19:44:22: calculating PCA: 2021-07-07 19:44:22: assigning cell cycle phases: 2021-07-07 19:44:22: reading data: 0.39secs
## 2.26secs
## 3.88secs
## 24.42secs
## 2021-07-07 19:44:46: calculating optimal rotation: 2021-07-07 19:44:46: calculating PCA: 2021-07-07 19:44:46: assigning cell cycle phases: 2021-07-07 19:44:46: reading data: 0.2secs
## 2.02secs
## 2.39mins
## 2.67mins
## 2021-07-07 19:47:26: calculating optimal rotation: 2021-07-07 19:47:26: calculating PCA: 2021-07-07 19:47:26: assigning cell cycle phases: 2021-07-07 19:47:26: reading data: 0.34secs
## 2.4secs
## 2.19mins
## 2.36mins
## 2021-07-07 19:49:48: calculating optimal rotation: 2021-07-07 19:49:48: calculating PCA: 2021-07-07 19:49:48: assigning cell cycle phases: 2021-07-07 19:49:48: reading data: 1.78secs
## 5.04secs
## 6.66secs
## 14.79secs
cco$ab %>% cell_cycle_plot2d(colors = colors)

cco$bh %>% cell_cycle_plot2d(colors = colors)

cco$cb %>% cell_cycle_plot2d(colors = colors)

cco$dm %>% cell_cycle_plot2d(colors = colors)

datasets$ab %>% show_goi_pca(goi$pvf$genes, group.by = "celltype", colors = colors)

datasets$bh %>% show_goi_pca(goi$pvf$genes, group.by = "celltype", colors = colors)

datasets$cb %>% show_goi_pca(goi$pvf$genes, group.by = "Group", colors = colors)

datasets$cb %>% show_goi_pca(goi$pvf$genes, group.by = "celltype", colors = colors)

datasets$dm %>% show_goi_pca(goi$pvf$genes, group.by = "subgroup", colors = colors)

datasets$dm %>% show_goi_pca(goi$pvf$genes, group.by = "cluster")

Click on the plot to scroll through the comparisons (shift-click to cycle back).
pairs.to.contrast <-
list(
list(A = goi$col$name, B = goi$air$name),
list(A = goi$glycolysis$name, B = goi$air$name),
list(A = goi$fib_mig$name, B = goi$air$name)
)
show_ab_density <- function(sce, group.by = "celltype", colors = ramp(sce@colData[[group.by]])) {
stopifnot(
all(sce@colData[[group.by]] %in% names(colors))
)
sce %<>%
scater::aggregateAcrossFeatures(
ids = symbol2goiset(rownames(.)),
average = TRUE,
use.assay.type = "logcounts"
)
pairs.to.contrast %>%
lapply(dual(function(A, B) {
data.frame(
a = sce@assays@data$logcounts[A, ],
b = sce@assays@data$logcounts[B, ],
g = sce@colData[[group.by]]
) %>% {
(.) %>%
ggplot(aes(x = (a - b), color = g)) +
labs(x = paste0(" - ", B, " + ", A)) +
labs(y = "Fraction of cells (i.e., density)") +
labs(color = group.by) +
scale_color_manual(values = colors, breaks = gglast(colour)) +
#ggtitle(paste0(A, " vs ", B)) +
geom_density(size = 3)
}
}))
}
show_ab_2d <- function(sce, group.by = "celltype", colors = ramp(sce@colData[[group.by]])) {
stopifnot(
all(sce@colData[[group.by]] %in% names(colors))
)
# Group -- for color
g <- sce@colData[[group.by]]
# Library size -- for size
s <- sce@assays@data$counts %>% colSums()
sce %<>%
scater::aggregateAcrossFeatures(
ids = symbol2goiset(rownames(.)),
average = TRUE,
use.assay.type = "logcounts"
)
pairs.to.contrast %>%
lapply(dual(function(A, B) {
data.frame(
x = sce@assays@data$logcounts[A, ],
y = sce@assays@data$logcounts[B, ],
g = g,
s = s
) %>% {
(.) %>%
ggplot(aes(x = x, y = y, color = g, size = s)) +
labs(x = A, y = B, color = group.by, size = "Library size") +
scale_color_manual(values = colors, breaks = gglast(colour)) +
geom_point(alpha = 0.6) +
#ggtitle(paste0(A, " vs ", B)) +
guides(shape = FALSE) +
geom_density_2d(size = 0.3, alpha = 0.5) +
guides(color = guide_legend(override.aes = list(alpha = 1)))
}
}))
}
datasets$ab %>% show_ab_density(group.by = "celltype", colors = colors)



datasets$ab %>% show_ab_2d(group.by = "celltype", colors = colors)
## now dyn.load("/usr/lib/R/library/MASS/libs/MASS.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/isoband/libs/isoband.so") ...



datasets$bh %>% show_ab_density(group.by = "celltype", colors = colors)



datasets$bh %>% show_ab_2d(group.by = "celltype", colors = colors)



datasets$cb %>% show_ab_density(group.by = "celltype", colors = colors)



datasets$cb %>% show_ab_2d(group.by = "celltype", colors = colors)



datasets$dm %>% show_ab_density(group.by = "cluster")



datasets$dm %>% show_ab_2d(group.by = "cluster")



datasets$dm %>% show_ab_density(group.by = "subgroup", colors = colors)



datasets$dm %>% show_ab_2d(group.by = "subgroup", colors = colors)



de <- list(
# Other/375_VLMC
ab_other_vs_375 = datasets$ab %>% compute_de(~celltype),
# FB2/FB1:
bh_fb2_vs_fb1 = datasets$bh %>% compute_de(~celltype),
# EAE/Ctrl (exclude the VLMC3 cells because they separate on PCA):
cb_eae_vs_ctrl = datasets$cb %>% `[`(, (.)$celltype != "VLMC3") %>% compute_de(~Group),
# Other/VLMC3:
cb_other_vs_vlmc3 = datasets$cb %>% compute_de(~celltype),
# EAE/Healthy:
dm_eae_vs_hthy = datasets$dm %>% compute_de(~group)
)
show_heatmap_slides <- function(sce, de_table) {
genesets <- list(
list(name = "Neuron projection", genes = go2symbols("neuron projection")),
list(name = "Cell adhesion", genes = go2symbols("cell adhesion")),
list(name = "Fibroblast growth factor", genes = go2symbols("fibroblast growth factor")),
list(name = "Smooth muscle", genes = go2symbols("smooth muscle")),
list(name = "Cell communication", genes = go2symbols("cell communication")),
list(name = "Chemotaxis", genes = go2symbols("chemotaxis")),
list(name = "Amyloid", genes = go2symbols("amyloid")),
list(name = "Insulin", genes = go2symbols("insulin")),
list(name = "Actin filament", genes = go2symbols("actin filament")),
list(name = "Apoptosis", genes = go2symbols("apoptosis")),
goi$fib_mig,
list(name = "Wounding", genes = go2symbols("wounding")),
list(name = "Inflammatory", genes = go2symbols("inflammatory")),
list(name = "Immune response", genes = go2symbols("immune response")),
goi$neuroinflammation,
goi$air,
goi$glycolysis,
goi$col,
list(name = "Cytokine activity", genes = go2symbols("^cytokine activity")),
list(name = "Chemokine activity", genes = go2symbols("^chemokine activity")),
goi$eae_common_up,
list(name = "Most-DE genes", genes = de_table$symbol, n = 33)
)
# genesets <- goi[c("glycolysis", "fib_mig")] # DEBUG
# default max number of genes to show
n <- 47
genesets %>% lapply(function(geneset) {
geneset %>%
with({
sub_de_table <- de_table %>% filter(symbol %in% genes)
if (!(any(rownames(sce) %in% sub_de_table$symbol))) {
return()
}
show_de_heatmap(
sce,
sub_de_table,
colors = colors,
cluster_cols = FALSE,
n = n,
main = name
)
}) %>%
magrittr::use_series(gtable)
})
}
show_heatmap_slides(
sce_dummy %>% .[, order(.$subgroup)],
sce_dummy.de$table %>% filter(src == "edgeR")
)
de$ab_other_vs_375$table %>% show_de_table()
Click on the image to cycle through the sets (shift-click to cycle back).
show_heatmap_slides(
datasets$ab %>% .[, order(.$celltype)],
de$ab_other_vs_375$table %>% filter(src == "edgeR")
)






















de$ab_other_vs_375$table %>%
filter(src == "edgeR") %>%
show_de_basecamp() +
labs(x = "Up in 375_VLMC <-- LFC --> Up in Other")

de$ab_other_vs_375$table %>%
show_de_cmp_lfc()

de$ab_other_vs_375$table %>%
show_de_volcano() +
labs(x = "Up in 375_VLMC <-- LFC --> Up in Other", color = "Geneset")

de$bh_fb2_vs_fb1$table %>% show_de_table()
Click on the image to cycle through the sets.
show_heatmap_slides(
datasets$bh %>% .[, order(.$celltype)],
de$bh_fb2_vs_fb1$table %>% filter(src == "edgeR")
)






















de$bh_fb2_vs_fb1$table %>%
filter(src == "edgeR") %>%
show_de_basecamp() +
labs(x = "Up in FB1 <-- LFC --> Up in FB2")

de$bh_fb2_vs_fb1$table %>%
show_de_cmp_lfc()

de$bh_fb2_vs_fb1$table %>%
show_de_volcano() +
labs(x = "Up in FB1 <-- LFC --> Up in FB2", color = "Geneset")

de$cb_other_vs_vlmc3$table %>% show_de_table()
Click on the image to cycle through the sets.
show_heatmap_slides(
datasets$cb %>% .[, order(.$celltype)],
de$cb_other_vs_vlmc3$table %>% filter(src == "edgeR")
)






















de$cb_other_vs_vlmc3$table %>%
filter(src == "edgeR") %>%
show_de_basecamp() +
labs(x = "Up in VLMC3 <-- LFC --> Up in Other")

de$cb_other_vs_vlmc3$table %>%
show_de_cmp_lfc()

de$cb_other_vs_vlmc3$table %>%
show_de_volcano() +
labs(x = "Up in VLMC3 <-- LFC --> Up in Other", color = "Geneset")

de$cb_eae_vs_ctrl$table %>% show_de_table()
Click on the image to cycle through the sets.
show_heatmap_slides(
datasets$cb %>% .[, .$celltype != "VLMC3"] %>% .[, order(.$Group)],
de$cb_eae_vs_ctrl$table %>% filter(src == "edgeR")
)






















de$cb_eae_vs_ctrl$table %>%
filter(src == "edgeR") %>%
show_de_basecamp() +
labs(x = "Up in Ctrl <-- LFC --> Up in EAE")

de$cb_eae_vs_ctrl$table %>%
show_de_cmp_lfc()

de$cb_eae_vs_ctrl$table %>%
show_de_volcano() +
labs(x = "Up in Ctrl <-- LFC --> Up in EAE", color = "Geneset")

de$dm_eae_vs_hthy$table %>% show_de_table()
Click on the image to cycle through the sets.
datasets$dm %>%
{
# Remove the `celltype` columns (same as `cluster`)
.@colData %<>% `[`(, colnames(.) != "celltype", drop = FALSE)
# Order samples
.[, order(.$subgroup)]
} %>%
show_heatmap_slides(
de$dm_eae_vs_hthy$table %>% filter(src == "edgeR")
)






















de$dm_eae_vs_hthy$table %>%
filter(src == "edgeR") %>%
show_de_basecamp() +
labs(x = "Healthy <-- LFC --> EAE")

de$dm_eae_vs_hthy$table %>%
show_de_cmp_lfc()

de$dm_eae_vs_hthy$table %>%
show_de_volcano() +
labs(x = "Healthy <-- LFC --> EAE", color = "Geneset")

Use DE results from EdgeR.
go <-
list(n33 = 33, n77 = 77, n150 = 150) %>% lapply(function(n) {
de %>% lapply(function(de_result) {
de_result$table %>%
filter(src == "edgeR") %>%
compute_go(n = n)
})
})
go$n33$ab_other_vs_375 %>% show_go_table()
go$n33$bh_fb2_vs_fb1 %>% show_go_table()
go$n33$cb_other_vs_vlmc3 %>% show_go_table()
go$n33$cb_eae_vs_ctrl %>% show_go_table()
go$n33$dm_eae_vs_hthy %>% show_go_table()
The straight edges are the top negative (blue) and positive (red) gene-gene Spearman correlations among the “genes of interest” within each sub-dataset. The nodes are colored by condition.
The arched edges are TF-target, ligand-receptor, protein-protein interactions from OmniPath. The color is from co-expression (yellow = neutral). The line is dashed if the observed co-expression contradicts the expected effect, e.g. inhibition is expected but positive co-expression is observed.
Click on the figure to change the view; alt-click to enlarge.
datasets$ab %>%
take_rows(get_goi_genes()) %>%
coex() %>%
show_coex_graph(de$ab_other_vs_375$table %>% filter(src == "edgeR"))




datasets$ab %>%
take_rows(get_goi_genes()) %>%
`[`(, .$celltype == "374_VLMC") %>%
coex() %>%
show_coex_graph(de$ab_other_vs_375$table %>% filter(src == "edgeR"))




datasets$ab %>%
take_rows(get_goi_genes()) %>%
`[`(, .$celltype == "375_VLMC") %>%
coex() %>%
show_coex_graph(de$ab_other_vs_375$table %>% filter(src == "edgeR"))




datasets$ab %>%
take_rows(get_goi_genes()) %>%
`[`(, .$celltype == "376_VLMC") %>%
coex() %>%
show_coex_graph(de$ab_other_vs_375$table %>% filter(src == "edgeR"))




datasets$bh %>%
take_rows(get_goi_genes()) %>%
coex() %>%
show_coex_graph(de$bh_fb2_vs_fb1$table %>% filter(src == "edgeR"))




datasets$bh %>%
take_rows(get_goi_genes()) %>%
`[`(, .$celltype == "FB1") %>%
coex() %>%
show_coex_graph(de$bh_fb2_vs_fb1$table %>% filter(src == "edgeR"))




datasets$bh %>%
take_rows(get_goi_genes()) %>%
`[`(, .$celltype == "FB2") %>%
coex() %>%
show_coex_graph(de$bh_fb2_vs_fb1$table %>% filter(src == "edgeR"))




datasets$cb %>%
take_rows(get_goi_genes()) %>%
coex() %>%
show_coex_graph(de$cb_eae_vs_ctrl$table %>% filter(src == "edgeR"))




datasets$cb %>%
take_rows(get_goi_genes()) %>%
`[`(, .$Group == "Ctrl") %>%
coex() %>%
show_coex_graph(de$cb_eae_vs_ctrl$table %>% filter(src == "edgeR"))




datasets$cb %>%
take_rows(get_goi_genes()) %>%
`[`(, .$Group == "EAE") %>%
coex() %>%
show_coex_graph(de$cb_eae_vs_ctrl$table %>% filter(src == "edgeR"))




datasets$dm %>%
take_rows(get_goi_genes()) %>%
coex() %>%
show_coex_graph(de$dm_eae_vs_hthy$table %>% filter(src == "edgeR"))




datasets$dm %>%
take_rows(get_goi_genes()) %>%
`[`(, .$subgroup %in% c("healthy1", "healthy2")) %>%
coex() %>%
show_coex_graph(de$dm_eae_vs_hthy$table %>% filter(src == "edgeR"))




datasets$dm %>%
take_rows(get_goi_genes()) %>%
`[`(, .$subgroup == "healthy3") %>%
coex() %>%
show_coex_graph(de$dm_eae_vs_hthy$table %>% filter(src == "edgeR"))




datasets$dm %>%
take_rows(get_goi_genes()) %>%
`[`(, .$subgroup == "EAE1") %>%
coex() %>%
show_coex_graph(de$dm_eae_vs_hthy$table %>% filter(src == "edgeR"))




datasets$dm %>%
take_rows(get_goi_genes()) %>%
`[`(, .$subgroup == "EAE2") %>%
coex() %>%
show_coex_graph(de$dm_eae_vs_hthy$table %>% filter(src == "edgeR"))




Clean up.
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 9457667 505.1 16430580 877.5 16430580 877.5
## Vcells 143595861 1095.6 293287409 2237.7 293269036 2237.5
RAM usage.
culprits() %>%
head(10) %>%
kable()
| size | cumsize | unit | |
|---|---|---|---|
| datasets | 470.00 | 670.0 | Mb |
| omnipath.tf | 66.00 | 200.0 | Mb |
| omnipath.in | 41.00 | 130.0 | Mb |
| gene.summary | 39.00 | 91.0 | Mb |
| de | 17.00 | 52.0 | Mb |
| go_info | 16.00 | 35.0 | Mb |
| go | 5.90 | 20.0 | Mb |
| omnipath.lr | 4.10 | 14.0 | Mb |
| show_coex_graph | 1.60 | 9.7 | Mb |
| sce_dummy | 0.52 | 8.1 | Mb |
de_tables <- list(
a = de$ab_other_vs_375$table %>% mutate(name = "AB: Other/375_VLMC"),
b = de$bh_fb2_vs_fb1$table %>% mutate(name = "Bh: FB2/FB1"),
c1 = de$cb_other_vs_vlmc3$table %>% mutate(name = "CB: Other/VLMC3"),
c2 = de$cb_eae_vs_ctrl$table %>% mutate(name = "CB': EAE/Ctrl"),
d = de$dm_eae_vs_hthy$table %>% mutate(name = "Dm: EAE/Healthy")
)
de_tables %>%
dual(rbind) %>%
slice_sample(n = 7)
## # A tibble: 7 x 7
## symbol base lfc p src color name
## * <chr> <dbl> <dbl> <dbl> <chr> <chr> <chr>
## 1 Sdhaf2 9.32 0.0982 1 edgeR #C88243FF Dm: EAE/Healthy
## 2 Kcns1 0.00772 -0.00442 0.999 DESeq2 #56AC2BFF AB: Other/375_VLMC
## 3 Egf 9.58 -0.233 1 edgeR #49940BFF AB: Other/375_VLMC
## 4 Mcoln2 2.02 -6.20 0.315 DESeq2 #CD5B45FF CB: Other/VLMC3
## 5 Bcl2 3.74 -0.0926 1 DESeq2 #B07633FF CB: Other/VLMC3
## 6 Krtap29-1 9.27 0 1 edgeR #35DE12FF Dm: EAE/Healthy
## 7 Kdm5c 0.101 0.363 1.00 DESeq2 #B38F3CFF Dm: EAE/Healthy
datasets$combo <-
list(
ab = data.frame(
datasets$ab@assays@data$counts %>% take_rows(get_goi_genes()) %>% t(),
group = "AB",
subgroup = datasets$ab$celltype
),
bh = data.frame(
datasets$bh@assays@data$counts %>% take_rows(get_goi_genes()) %>% t(),
group = "Bh",
subgroup = datasets$bh$celltype
),
cb = data.frame(
datasets$cb@assays@data$counts %>% take_rows(get_goi_genes()) %>% t(),
group = "CB",
subgroup = datasets$cb$Group
),
dm = data.frame(
datasets$dm@assays@data$counts %>% take_rows(get_goi_genes()) %>% t(),
group = "Dm",
subgroup = datasets$dm$subgroup
) %>% {
(.)[sample(rownames(.), size = min(nrow(.), 777)), ]
}
) %>%
lapply(function(ds) {
ds[, Reduce(intersect, lapply(., colnames))]
}) %>%
dual(rbind) %>%
{
make_sce(
(.) %>% select(-group, -subgroup) %>% t(),
(.) %>% select(group, subgroup)
)
}
print(datasets$combo)
## class: SingleCellExperiment
## dim: 326 1017
## metadata(0):
## assays(1): counts
## rownames(326): Fos Fosb ... Tpm2 Actb
## rowData names(0):
## colnames(1017): ab.CAGAATCTCTGGGCCA-L8TX_181011_01_A03
## ab.CGTCAGGTCTGTCTAT-L8TX_180115_01_G11 ... dm.EAE2_AGCGTCGAGAGGTTGC
## dm.EAE2_GTCTCGTTCCGTCATC
## colData names(2): group subgroup
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
Compare the gene LFC (edgeR) across datasets. Each dot is a gene with x-coordinate as the LFC in one dataset and y-coordinate as the LFC in another.
de_tables %>%
lapply(function(t) {
t %>%
group_by(name, src) %>%
mutate(lfc = (lfc - mean(lfc)) / sd(lfc)) %>%
ungroup() %>%
filter(src == "edgeR")
}) %>%
with({
rbind(
merge(a, b, by = "symbol"),
merge(a, c1, by = "symbol"),
merge(a, c2, by = "symbol"),
merge(a, d, by = "symbol"),
merge(b, c1, by = "symbol"),
merge(b, c2, by = "symbol"),
merge(b, d, by = "symbol"),
merge(c1, d, by = "symbol"),
merge(c2, d, by = "symbol")
)
}) %>%
mutate(goi_set = symbol2goiset(symbol)) %>%
{
ggplot(., aes(x = lfc.x, y = lfc.y)) +
# Background
geom_point(alpha = 0.05, color = "black", size = 0.7) +
geom_smooth(formula = y ~ x, method = "lm", size = 0.2, color = "gray") +
# Foreground
geom_point(
data = filter(., goi_set != "Other"),
aes(color = goi_set),
stroke = 0, size = 1, alpha = 0.7
) +
# geom_smooth(data = filter(., goi_set != "Other"), formula = y ~ x, method = 'lm', size = 0.2) +
labs(x = "LFC", y = "LFC", color = "") +
# scale_color_brewer(palette = "Set1") +
facet_grid(
cols = vars(name.x), rows = vars(name.y),
scale = "free"
) +
theme(
legend.position = "bottom",
axis.text = element_blank(),
axis.ticks = element_blank(),
strip.text = element_text(size = 10),
axis.title = element_blank()
) +
guides(color = guide_legend(override.aes = list(size = 3, alpha = 1)))
}

The following table shows the top genes that are DE (one way or another according to edgeR) simultaneously in several datasets.
make_common_de_table <- function(de_tables, slicer = slice_max) {
de_tables %>%
lapply(function(t) {
t %>%
group_by(name, src) %>%
mutate(norm.lfc = ((lfc - mean(lfc)) / sd(lfc))) %>%
slicer(abs(norm.lfc), n = 777) %>%
ungroup() %>%
filter(src == "edgeR")
}) %>%
dual(rbind) %>%
as.data.frame() %>%
{
# print(head(.))
.
} %>%
select(symbol, norm.lfc, name) %>%
stats::reshape(direction = "wide", idvar = "symbol", timevar = "name") %>%
# Commonly-DE first
arrange(desc(rowSums(!is.na(across(where(is.numeric)))))) %>%
# Signature of NAs
rowwise() %>%
mutate(signature = paste(across(where(is.numeric), ~ ifelse(is.na(.x), "N", "Y")), collapse = "")) %>%
ungroup() %>%
# Role: TF, Rec, Lig, etc
mutate(role = symbol2role(symbol)) %>%
# Reset rownames
as.data.frame(row.names = 1:nrow(.)) %>%
# Formatting
mutate(symbol = as.link(symbol)) %>%
mutate(across(where(is.numeric), signif, 3)) %>%
DT::datatable(escape = FALSE, rownames = TRUE)
}
de_tables %>% make_common_de_table(slicer = slice_max)
datasets$grouped <-
list(
list(ds = datasets$ab, name = "AB", group.by = "celltype"),
list(ds = datasets$bh, name = "Bh", group.by = "celltype"),
list(ds = datasets$cb, name = "CB", group.by = "celltype"),
list(ds = datasets$cb, name = "CB", group.by = "Group"),
list(ds = datasets$dm, name = "Dm", group.by = "cluster"),
list(ds = datasets$dm, name = "Dm", group.by = "subgroup")
) %>%
lapply(dual(
function(ds, name, group.by) {
ds %>%
# Subset genes
take_rows(
unique(goi %>% lapply(as.data.frame) %>% dual(rbind) %>% pull(genes))
) %>%
scuttle::aggregateAcrossCells(ids = ds[[group.by]], use.assay.type = "logcounts", statistics = "mean") %>%
SingleCellExperiment::logcounts() %>%
as.data.frame() %>%
ind2col("symbol") %>%
reshape2::melt(id.vars = "symbol", variable.name = "subgroup", value.name = "expression") %>%
mutate(dataset = name, subdataset = paste0(name, " (", group.by, ")"), group = group.by) %>%
mutate(role = symbol2role(symbol))
# TODO: add description?
}
)) %>%
dual(rbind)
interesting <- c(
# Daneman clusters
c("Rbp1", "Fn1", "Igfbp7"),
# Aerobic
c("Cox3b", "Cox7c", "Bloc1s1"),
# Anaerobic glycolysis
c("Ier3"),
# Collagen
c("Col4a1", "Col4a2", "Col8a1", "Col3a1"),
# Fib. migration
c("Sdc4")
)
show_stack <- function(ds) {
ds$role
ds %>%
mutate(role = if_else(role == "", "Unknown", role)) %>%
group_by(subdataset) %>%
mutate(expression = 100 * expression / max(expression)) %>%
ungroup() %>%
mutate(
symbol = if_else(symbol %in% interesting, glue("<span style='color:red;'>{symbol}</span>"), symbol)
) %>%
mutate(color = c(ramp(0:7), colors)[as.character(subgroup)]) %>%
mutate(subgroup = glue("<span name='{subgroup}' style='color:{color};'>{subgroup}</span>")) %>%
# Append role to symbol
# mutate(symbol = paste0(symbol, " (", role, ")")) %>%
# Heatmaps
ggplot(aes(x = subgroup, y = symbol, fill = expression)) +
geom_tile() +
scale_y_discrete(limits = rev) +
scale_fill_gradient(low = "black", high = "yellow") +
labs(fill = "%") +
theme(
axis.text.x = ggtext::element_markdown(
size = 12, angle = 90, vjust = 0.5, hjust = 1
),
axis.title = element_blank(),
legend.title = element_text(size = 9),
legend.text = element_text(size = 9),
# Remove the `role` strip altogether
# strip.text.y = element_blank(),
strip.text.y = element_text(color = "black", angle = 0, size = 8),
strip.text.x = element_text(color = "black", size = 10),
) +
facet_grid(
cols = vars(subdataset),
rows = vars(role),
space = "free",
scales = "free",
labeller = label_wrap_gen(5)
) +
theme(axis.text.y = ggtext::element_markdown())
}
(0:7) %>%
lapply(function(i) {
datasets$grouped %>%
filter(symbol %in% goi[[paste0("c", i)]]$genes) %>%
show_stack() +
ggtitle(paste0("Cluster #", i)) +
theme(axis.text.y = ggtext::element_markdown(size = 12))
})








datasets$grouped %>%
filter(symbol %in% goi$neuroinflammation$genes) %>%
show_stack() +
theme(axis.text.y = ggtext::element_markdown(size = 12))

datasets$grouped %>%
filter(symbol %in% goi$col$genes) %>%
show_stack() +
theme(axis.text.y = ggtext::element_markdown(size = 9))

datasets$grouped %>%
filter(symbol %in% goi$air$genes) %>%
show_stack() +
theme(axis.text.y = ggtext::element_markdown(size = 7))

datasets$grouped %>%
filter(symbol %in% goi$glycolysis$genes) %>%
show_stack() +
theme(axis.text.y = ggtext::element_markdown(size = 6))

datasets$grouped %>%
filter(symbol %in% goi$fib_mig$genes) %>%
show_stack() +
theme(axis.text.y = ggtext::element_markdown(size = 10))

For some of the gene sets, the following tabs show reduced dimension plots (PCA/TSNE/UMAP) once the combined dataset is restricted to that gene set.
colors # required below
## 374_VLMC 375_VLMC 376_VLMC FB1
## "aquamarine3" "chartreuse4" "chartreuse3" "cornflowerblue"
## FB2 Ctrl EAE VLMC1
## "blue" "chartreuse4" "coral2" "chartreuse1"
## VLMC3 VLMC4 healthy EAE1
## "coral3" "coral4" "green" "coral1"
## EAE2 healthy1 healthy2 healthy3
## "coral3" "aquamarine" "chartreuse3" "chartreuse4"
## positive negative G1.S S
## "Red4" "Steelblue1" "#E16A86" "#B88A00"
## G2 G2.M M.G1
## "#50A315" "#00AD9A" "#009ADE"
common_map <- function(sce, dimred = NULL) {
sce %<>% drop_empty()
if (is.null(dimred)) {
sce %<>% norm1() %>% scater_attach()
return(list(
PCA = common_map(sce, dimred = "PCA"),
TSNE = common_map(sce, dimred = "TSNE"),
UMAP = common_map(sce, dimred = "UMAP")
))
}
sce %>%
{
cbind(
.@colData %>% as.data.frame(),
list(
TSNE = .@int_colData$reducedDims$TSNE %>%
as.data.frame() %>%
select(x = V1, y = V2),
UMAP = .@int_colData$reducedDims$UMAP %>%
as.data.frame() %>%
select(x = V1, y = V2),
PCA = .@int_colData$reducedDims$PCA %>%
as.data.frame() %>%
select(x = PC1, y = PC2)
)[[dimred]]
)
} %>%
group_by(group) %>%
arrange(subgroup) %>%
mutate(rk = as.factor(dense_rank(subgroup))) %>%
ungroup() %>%
mutate(color = colors[as.character(subgroup)]) %>%
mutate(subgroup = paste0(group, ": ", subgroup)) %>%
{
ggplot((.), aes(x = x, y = y, color = subgroup)) +
geom_point(data = ((.) %>% select(-group)), alpha = 0.05, size = 1) +
geom_point(alpha = 0.5, size = 1.5) +
facet_wrap(. ~ group) +
scale_color_manual(values = (select(., color, subgroup) %>% pull(color, subgroup))) +
ggplot2::ggtitle(dimred) +
theme(
axis.title = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
plot.title = element_text(),
) +
guides(color = guide_legend(override.aes = list(size = 3, alpha = 1)))
}
}
common_map_3d <- function(sce, color.by = "subgroup") {
sce %<>%
drop_empty() %>%
norm1() %>%
scater::logNormCounts() %>%
scater::runPCA()
data.frame(
color_group = sce@colData[[color.by]],
lib.size = sce@assays@data$counts %>% colSums(),
SingleCellExperiment::reducedDim(sce, "PCA") %>%
as.data.frame() %>%
select(PC1, PC2, PC3)
) %>%
group_by(color_group) %>%
sample(size = 33, replace = TRUE) %>%
ungroup() %>%
# plotly::plot_ly(type = "scatter3d", mode = "markers",
plotly::plot_ly() %>%
plotly::add_markers(
x = ~PC1, y = ~PC2, z = ~PC3,
marker = list(size = 2, opacity = 1),
color = ~color_group, colors = colors
) %>%
plotly::layout(
scene = list(
xaxis = list(title = "PC1", showticklabels = FALSE),
yaxis = list(title = "PC2", showticklabels = FALSE),
zaxis = list(title = "PC3", showticklabels = FALSE)
)
)
}
datasets$combo %>% common_map()



datasets$combo %>% common_map_3d()
geneset <- goi$neuroinflammation$genes
datasets$combo %>%
take_rows(geneset) %>%
common_map()



datasets$combo %>%
take_rows(geneset) %>%
common_map_3d()
geneset <- goi$col$genes
datasets$combo %>%
take_rows(geneset) %>%
common_map()



datasets$combo %>%
take_rows(geneset) %>%
common_map_3d()
geneset <- goi$air$genes
datasets$combo %>%
take_rows(geneset) %>%
common_map()



datasets$combo %>%
take_rows(geneset) %>%
common_map_3d()
geneset <- goi$glycolysis$genes
datasets$combo %>%
take_rows(geneset) %>%
common_map()



datasets$combo %>%
take_rows(geneset) %>%
common_map_3d()
geneset <- goi$fib_mig$genes
datasets$combo %>%
take_rows(geneset) %>%
common_map()



datasets$combo %>%
take_rows(geneset) %>%
common_map_3d()
For each dataset we compare the most-DE genes to those from the kidney fibroblast activation gene set SI7 (see above). The LFCs are first standardized and filtered to top absolute LFC.
de_tables_vs_si7 <-
de_tables %>%
dual(rbind) %>%
group_by(name, src) %>%
mutate(norm.lfc = (lfc - mean(lfc)) / sd(lfc)) %>%
filter(abs(norm.lfc) >= quantile(abs(norm.lfc), 0.95)) %>%
ungroup() %>%
merge(
y = fb_genes_si7 %>%
select(lfc = logFC) %>%
ind2col("symbol") %>%
mutate(norm.lfc = (lfc - mean(lfc)) / sd(lfc)) %>%
filter(abs(norm.lfc) >= quantile(abs(norm.lfc), 0.95)) %>%
rename(norm.lfc.ref = norm.lfc),
by = "symbol",
all = TRUE
) %>%
mutate(delta = norm.lfc - norm.lfc.ref)
We only keep the LFC from edgeR.
de_tables_vs_si7 %>%
tidyr::drop_na() %>%
filter(src == "edgeR") %>%
group_by(name, src) %>%
mutate(is_out = FALSE) %>%
mutate(
is_out = is_out |
symbol %in% (slice_min(., norm.lfc, n = 5) %>% pull(symbol)) |
symbol %in% (slice_max(., norm.lfc, n = 5) %>% pull(symbol))
) %>%
mutate(
is_out = is_out |
symbol %in% (slice_min(., norm.lfc.ref, n = 5) %>% pull(symbol)) |
symbol %in% (slice_max(., norm.lfc.ref, n = 5) %>% pull(symbol))
) %>%
mutate(label = if_else(is_out, symbol, "")) %>%
ungroup() %>%
ggplot(aes(x = norm.lfc, y = norm.lfc.ref, color = name)) +
ggrepel::geom_text_repel(
aes(label = label),
hjust = 1.1, size = 3, show.legend = FALSE,
max.overlaps = Inf, point.size = NA,
segment.alpha = 0.2
) +
geom_point(size = 1, alpha = 0.3) +
geom_hline(yintercept = 0, alpha = 0.2) +
geom_vline(xintercept = 0, alpha = 0.2) +
labs(x = "Standardized LFC in Dataset", y = "Standardized LFC in Reference", color = "Dataset") +
scale_color_brewer(palette = "Set1") +
theme(
legend.title = element_text(size = 9),
legend.text = element_text(size = 9),
legend.position = "bottom"
) +
guides(color = guide_legend(override.aes = list(alpha = 1, size = 3)))

The genes that have a very different LFC (between two conditions within a dataset) from the set SI7 (see data sources) are the one with the most positive or most negative “delta”.
de_tables_vs_si7 %>%
tidyr::drop_na() %>%
select(symbol, dataset = name, src, norm.lfc, norm.lfc.ref, delta) %>%
mutate(symbol = as.link(symbol)) %>%
DT::datatable(escape = FALSE)
# Genes DE in EAE but not in AB dataset
Clean up.
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 9518377 508.4 16430580 877.5 16430580 877.5
## Vcells 148458775 1132.7 293287409 2237.7 293273136 2237.5
RAM usage.
culprits() %>%
head(10) %>%
kable()
| size | cumsize | unit | |
|---|---|---|---|
| datasets | 470.0 | 690 | Mb |
| omnipath.tf | 66.0 | 220 | Mb |
| omnipath.in | 41.0 | 150 | Mb |
| gene.summary | 39.0 | 110 | Mb |
| de_tables | 19.0 | 73 | Mb |
| de | 17.0 | 54 | Mb |
| go_info | 16.0 | 37 | Mb |
| go | 5.9 | 22 | Mb |
| omnipath.lr | 4.1 | 16 | Mb |
| show_coex_graph | 1.6 | 12 | Mb |